## Working on integration with PCVAE

### Simplified code from "Regularized linear autoencoders recover the principal components, eventually"

##### Compare to: https://github.com/XuchanBao/linear-ae/
###### Here, I have simplfiied her code for the linear autoencoder with rotation and linear nested dropout autoencoder with expectation.
###### The test runs use optimal parameters from the WandB sweep, and the output loss, distance to subspace, and distance to alignment with true singular vectors matches the output from her original code in the WandB sweep.

#### Also note that rotation does not apply if not using SGD optimizer.

#### IMPORT LIBRARIES

#### Import custom libraries 

In [6]:
import os
os.chdir('../')
from pcmf import pcmf_full, path_plot, plot_ordercolor, plot_cluster_assignments
from pcvae import get_weights as get_weights_pcvae
# from pcvae import sparse_D as sparse_D_pcvae


#### Import other libraries

In [7]:
# prerequisites
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image

#### DEFINE DATA LOADER FUNCTIONS

In [21]:
#### DEFINE DATA LOADER FUNCTIONS ####

from torch.utils.data import Dataset, DataLoader 
import numpy as np
from scipy.stats import ortho_group

class synthetic_Dataset(Dataset):
    ''' Custom data class generated from PCMF synthetic data generator.. '''
    def __init__(self, X, X_labels):
        super(synthetic_Dataset,self).__init__()
        self.X = X
        self.X_labels = X_labels

    def __len__(self):
        return len(self.X_labels)
  
    def __getitem__(self, index):
        data = self.X[index]
        data_label = self.X_labels[index]
        return torch.tensor(data, dtype=torch.float32), torch.tensor(data_label, dtype=torch.float32)


class DataGeneratorPPCA(Dataset):

    def __init__(self, dims, hdims, min_sv=0.11, max_sv=5.0, sigma_sq=0.1, deterministic=True, total=10000):
        self.dims = dims
        self.hdims = hdims

        self.eigs = min_sv + (max_sv - min_sv) * np.linspace(0, 1, hdims)
        self.eigvectors = ortho_group.rvs(dims)[:, :hdims]
        self.w = np.matmul(self.eigvectors, np.diag(np.sqrt(self.eigs - sigma_sq)))

        self.sigma_sq = sigma_sq
        self.sigma = np.sqrt(sigma_sq)

        self.total = total
        self.deterministic = deterministic
        if self.deterministic:
            self.z_sample = np.random.normal(size=(total, self.hdims))
            self.x_sample = np.random.normal(np.matmul(self.z_sample, self.w.T), self.sigma).astype(np.float32)

    def __getitem__(self, i):
        if self.deterministic:
            return self.x_sample[i]
        else:
            z_sample = np.random.normal(size=self.hdims)
            return np.random.normal(self.w.dot(z_sample), self.sigma).astype(np.float32)

    def __len__(self):
        # Return a large number for an epoch
        return self.total


class DataGeneratorPCA(Dataset):
    def __init__(self, dims, hdims, min_sv=0.11, max_sv=5.0, total=10000, sv_list=None,
                 load_data=None):
        self.dims = dims
        self.hdims = hdims

        if load_data is None:
            if isinstance(sv_list, list):
                assert len(sv_list) == dims
                self.full_eigs = np.array(sorted(sv_list, reverse=True))
            else:
                self.full_eigs = min_sv + (max_sv - min_sv) * np.linspace(1, 0, dims)
            self.eigs = self.full_eigs[:hdims]

            self.full_svs = np.sqrt(self.full_eigs)

            self.full_eigvectors = ortho_group.rvs(dims)
            self.eigvectors = self.full_eigvectors[:, :hdims]

            self.total = total

            self.full_z_sample = np.random.normal(size=(total, self.dims))
            self.x_sample = (self.full_eigvectors @ np.diag(self.full_svs) @ self.full_z_sample.T).T.astype(np.float32)

        else:
            self.x_sample = load_data
            u, s, vh = np.linalg.svd(self.x_sample.T, full_matrices=False)
            self.eigs = s[:self.hdims]
            self.eigvectors = u[:, :self.hdims]
            self.total = len(self.x_sample)

    def __getitem__(self, i):
        return self.x_sample[i]

    def __len__(self):
        return self.total

    @property
    def shape(self):
        return self.x_sample.shape


#### DEFINE MODEL CLASSES

In [216]:
#### DEFINE MODEL CLASSES ####
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    '''
    Convert a scipy sparse matrix to a torch sparse tensor.
    '''
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def sparse_D_pcvae(n,p):
    '''
    Construct a sparse matrix, that when applied to a vector containing concatenated vectors
    of coefficients b = [b_1 b_2 ... b_n] where each b_i is p=num_var long and there are
    n = num_vec of them. Differences are taken between conformal elements (e.g. b_11 and b_21)
    across all unique pairwise combinations of vectors.
    '''
    comb_list = list(combinations(range(n),2))
    combs_arr = np.array(comb_list)
    num_combs = combs_arr.shape[0]
    data = np.ones_like(combs_arr)
    data[:,1] *= -1
    row = np.repeat(range(num_combs),2)
    col = combs_arr.flatten()
    D = csr_matrix((data.flatten(), (row, col)), shape=(num_combs, n))
    return sparse_mx_to_torch_sparse_tensor(D)

def convclust_penalty(recons, weights, wasserstein=False, q=2):
    '''
    Computes the differences between all rows of the output.
    :return: (Tensor)
    '''
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    n,p = recons.shape        
    D = sparse_D_pcvae(n,p).to(device)
#         if wasserstein:
#             recons = torch.sort(recons)[0]
#     print('recons',recons.shape, 'weights',weights.shape, 'D',D.shape)
    diffs = torch.norm(D.matmul(recons), q, dim=1)
    return torch.norm(torch.mul(weights, diffs), 1)

import os
import torch
import torch.nn as nn
import numpy as np
from itertools import combinations
from scipy.sparse import csr_matrix
import secrets
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class ModelConfig:
    def __init__(self, model_name, model_type, model_class, input_dim, hidden_dim, init_scale, optim_class, lr,
                 extra_model_args={}, extra_optim_args={}):
        self.model_name = model_name
        self.model_type = model_type
        self.model_class = model_class
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.init_scale = init_scale
        self.extra_model_args = extra_model_args

        self.optim_class = optim_class
        self.lr = lr
        self.extra_optim_args = extra_optim_args
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = model_class(input_dim=input_dim, hidden_dim=hidden_dim, init_scale=init_scale, **extra_model_args).to(device)

        self.optimizer = optim_class(self.model.parameters(), lr=lr, **extra_optim_args)

    @property
    def name(self):
        return self.model_name

    @property
    def type(self):
        return self.model_type

    def get_model(self):
        return self.model

    def get_optimizer(self):
        return self.optimizer

class LinearAE(nn.Module):
    def __init__(self,
                 input_dim, hidden_dim, init_scale=0.001,
                 weight_reg_type=None, l2_reg_list=None, cc_lambda=0.0, gauss_coef=1.0, neighbors=None):
        super(LinearAE, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.encoder = nn.Linear(input_dim, hidden_dim, bias=False)
        self.decoder = nn.Linear(hidden_dim, input_dim, bias=False)

        self.weight_reg_type = weight_reg_type
        self.l2_reg_scalar = None
        self.l2_reg_list = l2_reg_list

        self.encoder.weight.data.normal_(0.0, init_scale)
        self.decoder.weight.data.normal_(0.0, init_scale)
        
        self.cc_weights = None
        self.cc_lambda = cc_lambda
        self.gauss_coef = gauss_coef
        self.neighbors = neighbors
        
        # configure regularization parameters

#         assert self.weight_reg_type is None or isinstance(self.l2_reg_list, list), \
#             "l2_reg_list must be a list if weight_reg_type is not None"

# #         assert self.l2_reg_list is None or len(self.l2_reg_list) == hidden_dim, \
#             "Length of l2_reg_list must match latent dimension"

        if weight_reg_type in ("uniform_product", "uniform_sum"):
            self.l2_reg_scalar = l2_reg_list[0] ** 2    # more efficient to use scalar than diag_weights

        elif weight_reg_type == "non_uniform_sum":
            self.reg_weights = torch.tensor(
                np.array(self.l2_reg_list).astype(np.float32)
            )
            self.diag_weights = nn.Parameter(torch.diag(self.reg_weights), requires_grad=False)

    def forward(self, x):
        if self.weight_reg_type is 'convex_cluster':
            if self.cc_weights is None:
                print('Getting convex clustering weights.')
                self.cc_weights = torch.from_numpy(get_weights_pcvae(x.detach().cpu().numpy().reshape(x.shape[0], \
                                                                                     np.prod(x.shape[1::])) , gauss_coef=self.gauss_coef, neighbors=self.neighbors) ).to(device)
                print(self.cc_weights.shape)
        return self.get_reconstruction_loss(x) + self._get_reg_loss(x)

    def compute_trace_norm(self):
        """
        Computes the trace norm of the autoencoder, as well as decoder and encoder individually
        :return: trace_norm(W2W1), trace_norm(W1), trace_norm(W2)
        """
        return torch.matmul(self.decoder.weight, self.encoder.weight).norm(p='nuc'), \
               self.encoder.weight.norm(p='nuc'), \
               self.decoder.weight.norm(p='nuc'),

    def get_reconstruction_loss(self, x):
        z = self.encoder(x)
        recon = self.decoder(z)

        recon_loss = torch.sum((x - recon) ** 2) / len(x)
        return recon_loss

    def get_reg_weights_np(self):
        if self.weight_reg_type is None:
            return np.zeros(self.hidden_dim)
        return np.array(self.l2_reg_list)
    

    def _get_reg_loss(self, x):
        # Standard L2 regularization, applied to W2W1 (product loss)
        if self.weight_reg_type == 'uniform_product':
            return self.l2_reg_scalar * (torch.norm(torch.matmul(self.decoder.weight, self.encoder.weight)) ** 2)

        # Standard L2 regularization for encoder and decoder separately (sum loss)
        elif self.weight_reg_type == 'uniform_sum':
            # regularize both encoder and decoder
            return self.l2_reg_scalar * (torch.norm(self.encoder.weight) ** 2 + torch.norm(self.decoder.weight) ** 2)

        # non-uniform sum
        elif self.weight_reg_type == 'non_uniform_sum':
            return torch.norm(self.diag_weights @ self.encoder.weight) ** 2 \
                   + torch.norm(self.decoder.weight @ self.diag_weights) ** 2

        # Do not apply regularization
        elif self.weight_reg_type is None:
            return 0.0

        # Do not apply regularization
        elif self.weight_reg_type is 'convex_cluster':
            # Eulerian convex clustering loss
            # cc_target needs to be the reconstruction
            # need to get weights
            recon = self.decoder(self.encoder(x)).data.detach().cpu() #.numpy()
            convclust_loss = convclust_penalty(recon, self.cc_weights)
#             print('convclust_loss:',convclust_loss,'cc_lambda*convclust_loss:',self.cc_lambda,self.cc_lambda*convclust_loss)
            return self.cc_lambda*convclust_loss
        
        else:
            raise ValueError("weight_reg_type should be one of (uniform_product, uniform_sum, non_uniform_sum, None)")
            
            
class LinearAENestedDropout(nn.Module):
    def __init__(self,
                 input_dim, hidden_dim, init_scale=0.001, prior_probs=None, use_expectation=False):
        super(LinearAENestedDropout, self).__init__()

        self.use_expectation = use_expectation

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.encoder = nn.Linear(input_dim, hidden_dim, bias=False)
        self.decoder = nn.Linear(hidden_dim, input_dim, bias=False)

        self.encoder.weight.data.normal_(0.0, init_scale)
        self.decoder.weight.data.normal_(0.0, init_scale)

        if prior_probs is None:
            # use geometric distribution
            # p(b) = rho^b (1 - rho) (b = 0 ... k - 2)
            # p(b = k-1) = 1 - sum(p(b), b < k-1)

            self.geom_p = 0.9
            prior_probs = [self.geom_p ** b * (1 - self.geom_p) for b in range(self.hidden_dim - 1)]
            prior_probs.append(1.0 - sum(prior_probs))

        self.prior_probs = torch.tensor(prior_probs)

        cum_probs = [1. - sum(prior_probs[:i]) for i in range(self.hidden_dim)]
        self.cum_probs = torch.tensor(cum_probs)
        self.diag_expected_mask = nn.Parameter(torch.diag(self.cum_probs), requires_grad=False)
        l_expected_mask = np.zeros((self.hidden_dim, self.hidden_dim))
        for i in range(self.hidden_dim):
            l_expected_mask[i, i] = cum_probs[i]
            l_expected_mask[:i, i] = cum_probs[i]
            l_expected_mask[i, :i] = cum_probs[i]
        self.l_expected_mask = nn.Parameter(torch.from_numpy(l_expected_mask).float(), requires_grad=False)

    def forward(self, x):
        if self.use_expectation:
            tr_xtx = torch.norm(x) ** 2
            w1_x = self.encoder(x).T        # (k, n)
            tr_xt_w2_y = torch.trace(w1_x @ x @ self.decoder.weight @ self.diag_expected_mask)
            w2t_w2_masked = (self.decoder.weight.T @ self.decoder.weight) * self.l_expected_mask
            tr_yt_w2t_w2_y = torch.trace(w1_x @ w1_x.T @ w2t_w2_masked)

            recon_loss = (tr_xtx - 2 * tr_xt_w2_y + tr_yt_w2t_w2_y) / len(x)
        else:
            hidden_units = self.encoder(x)
            hidden_units = self._nested_dropout(hidden_units)
            recon = self.decoder(hidden_units)

            recon_loss = torch.sum((x - recon) ** 2) / len(x)
        return recon_loss

    def _nested_dropout(self, hidden_units):
        prior_inds = torch.multinomial(self.prior_probs, len(hidden_units), replacement=True)
        mask = torch.ones_like(hidden_units)
        for hdim_i in range(1, self.hidden_dim):
            drop_row_inds = (prior_inds < hdim_i).float()     # 1 if row is dropped, 0 if kept
            mask[:, hdim_i] = 1 - drop_row_inds     # 1 if kept, 0 if dropped

        masked_hidden_units = hidden_units * mask
        return masked_hidden_units

    def get_reconstruction_loss(self, x):
        hidden_units = self.encoder(x)
        recon = self.decoder(hidden_units)

        recon_loss = torch.sum((x - recon) ** 2) / len(x)
        return recon_loss
    


#### DEFINE MODEL TRAINING FUNCTION train_models

In [217]:
#### DEFINE MODEL TRAINING FUNCTION train_models ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train_models(data_loader, train_itr, metrics_dict, model_configs, eval_metrics_list=None):
    for train_i in range(train_itr):
        for x in data_loader:
            x_cuda = x.to(device)

            # ---- Optimize ----
            losses = {}

            model = model_config.get_model()
            optimizer = model_config.get_optimizer()

            optimizer.zero_grad()

            loss = model(x_cuda)

            loss.backward()
            
            if model_config.type == 'rotation':
                # Rotation Augmented Gradient (RAG) 
                y = model.encoder.weight @ x_cuda.T
                yy_t_norm = y @ y.T / float(len(x))
                yy_t_upper = yy_t_norm - yy_t_norm.tril()
                gamma = 0.5 * (yy_t_upper - yy_t_upper.T)

                model.encoder.weight.grad -= gamma @ model.encoder.weight
                model.decoder.weight.grad -= model.decoder.weight @ gamma.T

            optimizer.step()

            losses[model_config.name] = loss.item()

        # ---- Log statistics ----
        if train_i == 0 or (train_i + 1) % 10 == 0:
            print("".join(["Iteration = {}, Losses: ".format(train_i + 1)]
                          + ["{} = {} ".format(key, val) for key, val in losses.items()]))
                
    return model


In [218]:
def train_models_path(X_train, train_loader, model_config, lambda_path,
                   epochs_per_lambda=5, neighbors=None):
    ''' Train linear autoencoder (not setup currently for nested AE)'''
    embeddings_list = []
    encoder_weights_list = []
    decoder_weights_list = []
    reconstructions_list = []
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    for i, lambd in enumerate(lambda_path):         
        # Update convex clustering penalty
        model_config.get_model().cc_lambda = lambd
        # Train model over epochs_per_lambda iterations
        train_models(data_loader=train_loader, train_itr=epochs_per_lambda, model_configs=model_config, metrics_dict=None, eval_metrics_list=None)
        
        # Save encoder/decoder weights, encoded embeddings, and decoded reconstructions
        model = model_config.get_model()
        
        encoder_weights_list.append(model.encoder.weight.data.detach().cpu().numpy())
        decoder_weights_list.append(model.encoder.weight.data.detach().cpu().numpy())
        embeddings_list.append(model.encoder(torch.tensor(X_train, dtype=torch.float32).to(device)).detach().cpu().numpy())
        reconstructions_list.append(model.forward(torch.tensor(X_train, dtype=torch.float32))[0].data.detach().cpu().numpy())

    return encoder_weights_list, decoder_weights_list, embeddings_list, reconstructions_list, model


#### DEFINE EVALUATION METRICS

In [219]:
import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def get_weight_tensor_from_seq(weight_seq):
    if isinstance(weight_seq, nn.Linear):
        return weight_seq.weight.detach()
    elif isinstance(weight_seq, nn.Sequential):
        weight_tensor = None
        for layer in weight_seq:
            if isinstance(layer, nn.Linear):
                layer_weight = layer.weight.detach()
                if weight_tensor is None:
                    weight_tensor = layer_weight
                else:
                    weight_tensor = layer_weight @ weight_tensor
            elif isinstance(layer, nn.BatchNorm1d):
                bn_weight = layer.weight.detach()

                # ignore bias

                if weight_tensor is None:
                    weight_tensor = torch.diag(bn_weight)
                else:
                    weight_tensor = torch.diag(bn_weight) @ weight_tensor
            else:
                raise ValueError("Layer type {} not supported!".format(type(layer)))
        return weight_tensor


def metric_transpose_theorem(model):
    """
    Metric for how close encoder and decoder.T are
    :param model: LinearAE model
    :return: ||W1 - W2^T||_F^2 / hidden_dim
    """
    encoder_weight = get_weight_tensor_from_seq(model.encoder)
    decoder_weight = get_weight_tensor_from_seq(model.decoder)

    transpose_metric = torch.norm(encoder_weight - decoder_weight.T) ** 2
    return transpose_metric.item() / float(model.hidden_dim)


def metric_alignment(model, gt_eigvectors):
    """
    Metric for alignment of decoder columns to ground truth eigenvectors
    :param model: Linear AE model
    :param gt_eigvectors: ground truth eigenvectors (input_dims,hidden_dims)
    :return: sum_i (1 - max_j (cos(eigvector_i, normalized_decoder column_j)))
    """
    decoder_weight = get_weight_tensor_from_seq(model.decoder)
    decoder_np = decoder_weight.detach().cpu().numpy()

    # normalize columns of gt_eigvectors
    norm_gt_eigvectors = gt_eigvectors / np.linalg.norm(gt_eigvectors, axis=0)
    # normalize columns of decoder
    norm_decoder = decoder_np / (np.linalg.norm(decoder_np, axis=0) + 1e-8)

    total_angles = 0.0
    for eig_i in range(gt_eigvectors.shape[1]):
        eigvector = norm_gt_eigvectors[:, eig_i]
        total_angles += 1. - np.max(np.abs(norm_decoder.T @ eigvector)) ** 2

    return total_angles / float(model.hidden_dim)


def metric_subspace(model, gt_eigvectors, gt_eigs):
    decoder_weight = get_weight_tensor_from_seq(model.decoder)
    decoder_np = decoder_weight.detach().cpu().numpy()

    # k - tr(UU^T WW^T), where W is left singular vector matrix of decoder
    u, s, vh = np.linalg.svd(decoder_np, full_matrices=False)
    return 1 - np.trace(gt_eigvectors @ gt_eigvectors.T @ u @ u.T) / float(model.hidden_dim)


def metric_loss(model, data_loader):
    """
    Measures the full batch loss
    :param model: a linear (variational) AE model
    :param data_loader: full batch data loader. Should be different from the training data loader, if in minibatch mode
    """
    loss = None
    for x in data_loader:
        loss = model(x.to(device)).item()
    return loss


def metric_recon_loss(model, data_loader):
    recon_loss = None
    for x in data_loader:
        recon_loss = model.get_reconstruction_loss(x.to(device)).item()
    return recon_loss


### TRAING A MODEL (TESTING CONVEX CLUSTERING) 

In [220]:
##### GET DATA ####
import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

seed=1234
# set random seed
np.random.seed(seed)
torch.manual_seed(seed)

input_dim = 100
hidden_dim = 5

n_data = 50
batch_size = n_data

max_sv = float(input_dim) * 0.1
min_sv = 1.0
sigma = 0.5

gt_data = DataGeneratorPCA(input_dim, hidden_dim, min_sv=min_sv, max_sv=max_sv, total=n_data)
data = DataGeneratorPCA(input_dim, hidden_dim, load_data=gt_data.x_sample)

loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False)


In [221]:
#### Define the model ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### DEFINE MODEL #####
model_dict = dict(
    model_name='rotation',
    model_type='rotation',
    model_class=LinearAE,
    extra_model_args = {'weight_reg_type':'convex_cluster', 'gauss_coef':1.0, 'neighbors':None},
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    init_scale=0.0001,
    optim_class=torch.optim.SGD,
    extra_optim_args={'momentum': 0.9, 'nesterov': True},
    lr=0.0001,
    train_itr=1000,
    seed=seed
)


# model config contains the model 
model_config = ModelConfig(
        model_name=model_dict['model_name'],
        model_type=model_dict['model_type'],
        model_class=model_dict['model_class'],
        input_dim=model_dict['input_dim'], 
        hidden_dim=model_dict['hidden_dim'],
        init_scale=model_dict['init_scale'],
        extra_model_args=model_dict['extra_model_args'],
        optim_class=model_dict['optim_class'],
        lr=model_dict['lr'],
        extra_optim_args=model_dict['extra_optim_args']
    )

print(model_dict,'\n')
print(model_config.get_model(),'\n')
print(model_config.get_optimizer())

print('Transpose:', metric_transpose_theorem(model_config.get_model()),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(model_config.get_model(), data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(model_config.get_model(), data.eigvectors, data.eigs),'\n')



{'model_name': 'rotation', 'model_type': 'rotation', 'model_class': <class '__main__.LinearAE'>, 'extra_model_args': {'weight_reg_type': 'convex_cluster', 'gauss_coef': 1.0, 'neighbors': None}, 'input_dim': 100, 'hidden_dim': 5, 'init_scale': 0.0001, 'optim_class': <class 'torch.optim.sgd.SGD'>, 'extra_optim_args': {'momentum': 0.9, 'nesterov': True}, 'lr': 0.0001, 'train_itr': 1000, 'seed': 1234} 

LinearAE(
  (encoder): Linear(in_features=100, out_features=5, bias=False)
  (decoder): Linear(in_features=5, out_features=100, bias=False)
) 

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0
)
Transpose: 2.0780660634045487e-06 

Distance to axis-aligned solution: 0.9531327848446475 

Distance to optimal subspace): 0.9347729325294495 



In [226]:
#### Define the model ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### DEFINE MODEL #####
model_dict = dict(
    model_name='rotation',
    model_type='rotation',
    model_class=LinearAE,
    extra_model_args = {'weight_reg_type':None},
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    init_scale=0.0001,
    optim_class=torch.optim.SGD,
    extra_optim_args={'momentum': 0.9, 'nesterov': True},
    lr=0.0001,
    train_itr=1000,
    seed=seed
)


# model config contains the model 
model_config = ModelConfig(
        model_name=model_dict['model_name'],
        model_type=model_dict['model_type'],
        model_class=model_dict['model_class'],
        input_dim=model_dict['input_dim'], 
        hidden_dim=model_dict['hidden_dim'],
        init_scale=model_dict['init_scale'],
        extra_model_args=model_dict['extra_model_args'],
        optim_class=model_dict['optim_class'],
        lr=model_dict['lr'],
        extra_optim_args=model_dict['extra_optim_args']
    )

print(model_dict,'\n')
print(model_config.get_model(),'\n')
print(model_config.get_optimizer())

print('Transpose:', metric_transpose_theorem(model_config.get_model()),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(model_config.get_model(), data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(model_config.get_model(), data.eigvectors, data.eigs),'\n')


modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')

trained_model = train_models(data_loader=loader, train_itr=3000, metrics_dict=None, model_configs=model_config)
# trained_model = train_models(data_loader=loader, train_itr=model_dict['train_itr'], metrics_dict=None, model_configs=model_config)

modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')



{'model_name': 'rotation', 'model_type': 'rotation', 'model_class': <class '__main__.LinearAE'>, 'extra_model_args': {'weight_reg_type': None}, 'input_dim': 100, 'hidden_dim': 5, 'init_scale': 0.0001, 'optim_class': <class 'torch.optim.sgd.SGD'>, 'extra_optim_args': {'momentum': 0.9, 'nesterov': True}, 'lr': 0.0001, 'train_itr': 1000, 'seed': 1234} 

LinearAE(
  (encoder): Linear(in_features=100, out_features=5, bias=False)
  (decoder): Linear(in_features=5, out_features=100, bias=False)
) 

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0
)
Transpose: 2.064951513602864e-06 

Distance to axis-aligned solution: 0.9740111924670842 

Distance to optimal subspace): 0.9572605818510056 

Reconstrution Loss: 555.1112670898438 

Loss: 555.1112670898438 

Transpose: 2.064951513602864e-06 

Distance to axis-aligned solution: 0.9740111924670842 

Distance to optimal subspace): 0.9572605818510056 

Iteration = 1360, Losses: rotation = 401.7996826171875 
Iteration = 1370, Losses: rotation = 401.7763977050781 
Iteration = 1380, Losses: rotation = 401.7528076171875 
Iteration = 1390, Losses: rotation = 401.7289123535156 
Iteration = 1400, Losses: rotation = 401.7047119140625 
Iteration = 1410, Losses: rotation = 401.6802978515625 
Iteration = 1420, Losses: rotation = 401.6556396484375 
Iteration = 1430, Losses: rotation = 401.6307067871094 
Iteration = 1440, Losses: rotation = 401.60565185546875 
Iteration = 1450, Losses: rotation = 401.58038330078125 
Iteration = 1460, Losses: rotation = 401.55499267578125 
Iteration = 1470, Losses: rotation = 401.5294494628906 
Iteration = 1480, Losses: rotation = 401.5038146972656 
Iteration = 1490, Losses: rotation = 401.4781494140625 
Iteration = 1500, Losses: rotation = 401.4524230957031 
Iteration = 1510, Losses: rotation = 401.4266357421875 
Iteration = 1520, Losses: rotation = 401.40093994140625 
Iteration = 1530, Losses: rotation = 401.375

Iteration = 2930, Losses: rotation = 400.3013000488281 
Iteration = 2940, Losses: rotation = 400.3011779785156 
Iteration = 2950, Losses: rotation = 400.301025390625 
Iteration = 2960, Losses: rotation = 400.3009033203125 
Iteration = 2970, Losses: rotation = 400.3006896972656 
Iteration = 2980, Losses: rotation = 400.30059814453125 
Iteration = 2990, Losses: rotation = 400.3004150390625 
Iteration = 3000, Losses: rotation = 400.3003234863281 
Reconstrution Loss: 400.3003234863281 

Loss: 400.3003234863281 

Transpose: 2.2936279492569157e-06 

Distance to axis-aligned solution: 0.19753874445063957 

Distance to optimal subspace): 0.00022840499877929688 



In [225]:
#### Define the model ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### DEFINE MODEL #####
model_dict = dict(
    model_name='rotation',
    model_type='rotation',
    model_class=LinearAE,
    extra_model_args = {'weight_reg_type':'convex_cluster', 'gauss_coef':1.0, 'neighbors':None},
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    init_scale=0.0001,
    optim_class=torch.optim.SGD,
    extra_optim_args={'momentum': 0.9, 'nesterov': True},
    lr=0.0001,
    train_itr=1000,
    seed=seed
)


# model config contains the model 
model_config = ModelConfig(
        model_name=model_dict['model_name'],
        model_type=model_dict['model_type'],
        model_class=model_dict['model_class'],
        input_dim=model_dict['input_dim'], 
        hidden_dim=model_dict['hidden_dim'],
        init_scale=model_dict['init_scale'],
        extra_model_args=model_dict['extra_model_args'],
        optim_class=model_dict['optim_class'],
        lr=model_dict['lr'],
        extra_optim_args=model_dict['extra_optim_args']
    )

print(model_dict,'\n')
print(model_config.get_model(),'\n')
print(model_config.get_optimizer())

print('Transpose:', metric_transpose_theorem(model_config.get_model()),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(model_config.get_model(), data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(model_config.get_model(), data.eigvectors, data.eigs),'\n')



modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')

model_config.get_model().cc_lambda=10.0
trained_model = train_models(data_loader=loader, train_itr=3000, metrics_dict=None, model_configs=model_config)
# trained_model = train_models(data_loader=loader, train_itr=model_dict['train_itr'], metrics_dict=None, model_configs=model_config)

modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')



{'model_name': 'rotation', 'model_type': 'rotation', 'model_class': <class '__main__.LinearAE'>, 'extra_model_args': {'weight_reg_type': 'convex_cluster', 'gauss_coef': 1.0, 'neighbors': None}, 'input_dim': 100, 'hidden_dim': 5, 'init_scale': 0.0001, 'optim_class': <class 'torch.optim.sgd.SGD'>, 'extra_optim_args': {'momentum': 0.9, 'nesterov': True}, 'lr': 0.0001, 'train_itr': 1000, 'seed': 1234} 

LinearAE(
  (encoder): Linear(in_features=100, out_features=5, bias=False)
  (decoder): Linear(in_features=5, out_features=100, bias=False)
) 

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0
)
Transpose: 2.0612884327420034e-06 

Distance to axis-aligned solution: 0.9811180198675457 

Distance to optimal subspace): 0.9638129651546479 

Reconstrution Loss: 555.1112670898438 

Getting convex clustering weights.
torch.Size([1225])
Loss: 555.1112670898438 

Transpose: 2.0612884327420034e-06 



Iteration = 1270, Losses: rotation = 130794.42448369254 
Iteration = 1280, Losses: rotation = 130796.05611676889 
Iteration = 1290, Losses: rotation = 130797.59891847608 
Iteration = 1300, Losses: rotation = 130799.06280713933 
Iteration = 1310, Losses: rotation = 130800.44362436509 
Iteration = 1320, Losses: rotation = 130801.74777797647 
Iteration = 1330, Losses: rotation = 130802.9827922427 
Iteration = 1340, Losses: rotation = 130804.1552740793 
Iteration = 1350, Losses: rotation = 130805.25535717247 
Iteration = 1360, Losses: rotation = 130806.30052248527 
Iteration = 1370, Losses: rotation = 130807.2849150665 
Iteration = 1380, Losses: rotation = 130808.2120794023 
Iteration = 1390, Losses: rotation = 130809.0939304011 
Iteration = 1400, Losses: rotation = 130809.92480465987 
Iteration = 1410, Losses: rotation = 130810.70993593833 
Iteration = 1420, Losses: rotation = 130811.44589218208 
Iteration = 1430, Losses: rotation = 130812.1485749681 
Iteration = 1440, Losses: rotation = 

Iteration = 2740, Losses: rotation = 130818.40615716681 
Iteration = 2750, Losses: rotation = 130818.38263900731 
Iteration = 2760, Losses: rotation = 130818.36154983887 
Iteration = 2770, Losses: rotation = 130818.33832373712 
Iteration = 2780, Losses: rotation = 130818.31887674086 
Iteration = 2790, Losses: rotation = 130818.30109828427 
Iteration = 2800, Losses: rotation = 130818.28359731312 
Iteration = 2810, Losses: rotation = 130818.2617930623 
Iteration = 2820, Losses: rotation = 130818.24811637467 
Iteration = 2830, Losses: rotation = 130818.22806080291 
Iteration = 2840, Losses: rotation = 130818.21135160892 
Iteration = 2850, Losses: rotation = 130818.19624752119 
Iteration = 2860, Losses: rotation = 130818.17802910373 
Iteration = 2870, Losses: rotation = 130818.15942187478 
Iteration = 2880, Losses: rotation = 130818.1435739379 
Iteration = 2890, Losses: rotation = 130818.12473854085 
Iteration = 2900, Losses: rotation = 130818.11638031172 
Iteration = 2910, Losses: rotatio

### TRAIN A MODEL

####  Model #1: Get the data and define the model - rotation with 400 hidden_dim

In [7]:
##### GET DATA ####
import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

seed=1234
# set random seed
np.random.seed(seed)
torch.manual_seed(seed)

input_dim = 1000
hidden_dim = 400

n_data = 5000
batch_size = n_data

max_sv = float(input_dim) * 0.1
min_sv = 1.0
sigma = 0.5

gt_data = DataGeneratorPCA(input_dim, hidden_dim, min_sv=min_sv, max_sv=max_sv, total=n_data)
data = DataGeneratorPCA(input_dim, hidden_dim, load_data=gt_data.x_sample)

loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False)

#### Define the model ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### DEFINE MODEL #####
model_dict = dict(
    model_name='rotation',
    model_type='rotation',
    model_class=LinearAE,
    extra_model_args = {"weight_reg_type": None},
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    init_scale=0.0001,
    optim_class=torch.optim.SGD,
    extra_optim_args={'momentum': 0.9, 'nesterov': True},
    lr=0.0001,
#     optim_class=torch.optim.Adam,
#     extra_optim_args={},
#     lr=0.0003,
    train_itr=1000,#50000,
    seed=seed
)

# model config contains the model 
model_config = ModelConfig(
        model_name=model_dict['model_name'],
        model_type=model_dict['model_type'],
        model_class=model_dict['model_class'],
        input_dim=model_dict['input_dim'], 
        hidden_dim=model_dict['hidden_dim'],
        init_scale=model_dict['init_scale'],
        extra_model_args=model_dict['extra_model_args'],
        optim_class=model_dict['optim_class'],
        lr=model_dict['lr'],
        extra_optim_args=model_dict['extra_optim_args']
    )

print(model_dict,'\n')
print(model_config.get_model(),'\n')
print(model_config.get_optimizer())

print('Transpose:', metric_transpose_theorem(model_config.get_model()),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(model_config.get_model(), data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(model_config.get_model(), data.eigvectors, data.eigs),'\n')



{'model_name': 'rotation', 'model_type': 'rotation', 'model_class': <class '__main__.LinearAE'>, 'extra_model_args': {'weight_reg_type': None}, 'input_dim': 1000, 'hidden_dim': 400, 'init_scale': 0.0001, 'optim_class': <class 'torch.optim.sgd.SGD'>, 'extra_optim_args': {'momentum': 0.9, 'nesterov': True}, 'lr': 0.0001, 'train_itr': 1000, 'seed': 1234} 

LinearAE(
  (encoder): Linear(in_features=1000, out_features=400, bias=False)
  (decoder): Linear(in_features=400, out_features=1000, bias=False)
) 

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0
)
Transpose: 1.9994410686194896e-05 

Distance to axis-aligned solution: 0.9897644591560325 

Distance to optimal subspace): 0.6001819992065429 



#### Model #1: Run the model - rotation with 400 hidden_dim

In [13]:
modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')

trained_model = train_models(data_loader=loader, train_itr=model_dict['train_itr'], metrics_dict=None, model_configs=model_config)

modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')



Reconstrution Loss: 47806.57421875 

Loss: 49319.0234375 

Transpose: 0.003536132350564003 

Distance to axis-aligned solution: 0.09904575485510528 

Distance to optimal subspace): 3.8146972658470446e-07 

Iteration = 1, Losses: nd_expectation = 49319.0234375 
Iteration = 10, Losses: nd_expectation = 49319.01953125 
Iteration = 20, Losses: nd_expectation = 49319.01953125 
Iteration = 30, Losses: nd_expectation = 49319.0234375 
Iteration = 40, Losses: nd_expectation = 49319.01953125 
Iteration = 50, Losses: nd_expectation = 49319.0234375 
Iteration = 60, Losses: nd_expectation = 49319.01953125 
Iteration = 70, Losses: nd_expectation = 49319.01953125 
Iteration = 80, Losses: nd_expectation = 49319.01953125 
Iteration = 90, Losses: nd_expectation = 49319.01953125 
Iteration = 100, Losses: nd_expectation = 49319.01953125 
Iteration = 110, Losses: nd_expectation = 49319.01953125 
Iteration = 120, Losses: nd_expectation = 49319.01953125 
Iteration = 130, Losses: nd_expectation = 49319.019531

Iteration = 1390, Losses: nd_expectation = 49319.03515625 
Iteration = 1400, Losses: nd_expectation = 49319.03515625 
Iteration = 1410, Losses: nd_expectation = 49319.03125 
Iteration = 1420, Losses: nd_expectation = 49319.02734375 
Iteration = 1430, Losses: nd_expectation = 49319.0390625 
Iteration = 1440, Losses: nd_expectation = 49319.046875 
Iteration = 1450, Losses: nd_expectation = 49319.03125 
Iteration = 1460, Losses: nd_expectation = 49319.03125 
Iteration = 1470, Losses: nd_expectation = 49319.03515625 
Iteration = 1480, Losses: nd_expectation = 49319.03515625 
Iteration = 1490, Losses: nd_expectation = 49319.03515625 
Iteration = 1500, Losses: nd_expectation = 49319.03515625 
Iteration = 1510, Losses: nd_expectation = 49319.03515625 
Iteration = 1520, Losses: nd_expectation = 49319.03515625 
Iteration = 1530, Losses: nd_expectation = 49319.03125 
Iteration = 1540, Losses: nd_expectation = 49319.03515625 
Iteration = 1550, Losses: nd_expectation = 49319.0390625 
Iteration = 1

Iteration = 2800, Losses: nd_expectation = 49319.03515625 
Iteration = 2810, Losses: nd_expectation = 49319.03515625 
Iteration = 2820, Losses: nd_expectation = 49319.04296875 
Iteration = 2830, Losses: nd_expectation = 49319.03515625 
Iteration = 2840, Losses: nd_expectation = 49319.04296875 
Iteration = 2850, Losses: nd_expectation = 49319.04296875 
Iteration = 2860, Losses: nd_expectation = 49319.03515625 
Iteration = 2870, Losses: nd_expectation = 49319.03515625 
Iteration = 2880, Losses: nd_expectation = 49319.03515625 
Iteration = 2890, Losses: nd_expectation = 49319.03515625 
Iteration = 2900, Losses: nd_expectation = 49319.03515625 
Iteration = 2910, Losses: nd_expectation = 49319.04296875 
Iteration = 2920, Losses: nd_expectation = 49319.046875 
Iteration = 2930, Losses: nd_expectation = 49319.03125 
Iteration = 2940, Losses: nd_expectation = 49319.03515625 
Iteration = 2950, Losses: nd_expectation = 49319.046875 
Iteration = 2960, Losses: nd_expectation = 49319.04296875 
Iter

#### Model #2: Get the data and define the model - rotation with 50 hidden_dim

In [9]:
##### GET DATA ####
import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

seed=1234
# set random seed
np.random.seed(seed)
torch.manual_seed(seed)

input_dim = 1000
hidden_dim = 50

n_data = 5000
batch_size = n_data

max_sv = float(input_dim) * 0.1
min_sv = 1.0
sigma = 0.5

gt_data = DataGeneratorPCA(input_dim, hidden_dim, min_sv=min_sv, max_sv=max_sv, total=n_data)
data = DataGeneratorPCA(input_dim, hidden_dim, load_data=gt_data.x_sample)

loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False)

#### Define the model ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### DEFINE MODEL #####
model_dict = dict(
    model_name='rotation',
    model_type='rotation',
    model_class=LinearAE,
    extra_model_args = {"weight_reg_type": None},
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    init_scale=0.0001,
    optim_class=torch.optim.SGD,
    extra_optim_args={'momentum': 0.9, 'nesterov': True},
    lr=0.0001,
#     optim_class=torch.optim.Adam,
#     extra_optim_args={},
#     lr=0.0003,
    train_itr=1000,#50000,
    seed=seed
)

# model config contains the model 
model_config = ModelConfig(
        model_name=model_dict['model_name'],
        model_type=model_dict['model_type'],
        model_class=model_dict['model_class'],
        input_dim=model_dict['input_dim'], 
        hidden_dim=model_dict['hidden_dim'],
        init_scale=model_dict['init_scale'],
        extra_model_args=model_dict['extra_model_args'],
        optim_class=model_dict['optim_class'],
        lr=model_dict['lr'],
        extra_optim_args=model_dict['extra_optim_args']
    )

print(model_dict,'\n')
print(model_config.get_model(),'\n')
print(model_config.get_optimizer())

print('Transpose:', metric_transpose_theorem(model_config.get_model()),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(model_config.get_model(), data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(model_config.get_model(), data.eigvectors, data.eigs),'\n')



{'model_name': 'rotation', 'model_type': 'rotation', 'model_class': <class '__main__.LinearAE'>, 'extra_model_args': {'weight_reg_type': None}, 'input_dim': 1000, 'hidden_dim': 50, 'init_scale': 0.0001, 'optim_class': <class 'torch.optim.sgd.SGD'>, 'extra_optim_args': {'momentum': 0.9, 'nesterov': True}, 'lr': 0.0001, 'train_itr': 1000, 'seed': 1234} 

LinearAE(
  (encoder): Linear(in_features=1000, out_features=50, bias=False)
  (decoder): Linear(in_features=50, out_features=1000, bias=False)
) 

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.0001
    maximize: False
    momentum: 0.9
    nesterov: True
    weight_decay: 0
)
Transpose: 1.9993053283542393e-05 

Distance to axis-aligned solution: 0.993878737828118 

Distance to optimal subspace): 0.9511795520782471 



#### Model #2: Run the model - rotation with 50 hidden_dim

In [10]:
modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')

trained_model = train_models(data_loader=loader, train_itr=1000, metrics_dict=None, model_configs=model_config)

modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')



Reconstrution Loss: 50555.9375 

Loss: 50555.9375 

Transpose: 1.9993053283542393e-05 

Distance to axis-aligned solution: 0.993878737828118 

Distance to optimal subspace): 0.9511795520782471 

Iteration = 1, Losses: rotation = 50555.9375 
Iteration = 10, Losses: rotation = 50555.86328125 
Iteration = 20, Losses: rotation = 50555.3125 
Iteration = 30, Losses: rotation = 50549.6328125 
Iteration = 40, Losses: rotation = 50485.734375 
Iteration = 50, Losses: rotation = 49784.1875 
Iteration = 60, Losses: rotation = 46384.1953125 
Iteration = 70, Losses: rotation = 44725.10546875 
Iteration = 80, Losses: rotation = 44449.55859375 
Iteration = 90, Losses: rotation = 44315.09765625 
Iteration = 100, Losses: rotation = 44242.43359375 
Iteration = 110, Losses: rotation = 44198.40234375 
Iteration = 120, Losses: rotation = 44166.01171875 
Iteration = 130, Losses: rotation = 44141.96875 
Iteration = 140, Losses: rotation = 44123.3984375 
Iteration = 150, Losses: rotation = 44108.734375 
Iterat

#### Model #3: Get the data and define the model - nested dropout with 40 hidden_dim

In [11]:
##### GET DATA ####
import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

seed=1234
# set random seed
np.random.seed(seed)
torch.manual_seed(seed)

input_dim = 1000
hidden_dim = 20

n_data = 5000
batch_size = n_data

max_sv = float(input_dim) * 0.1
min_sv = 1.0
sigma = 0.5

gt_data = DataGeneratorPCA(input_dim, hidden_dim, min_sv=min_sv, max_sv=max_sv, total=n_data)
data = DataGeneratorPCA(input_dim, hidden_dim, load_data=gt_data.x_sample)

loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=False)

#### Define the model ####

import os
import torch
import numpy as np
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


#### DEFINE MODEL #####
model_dict = dict(
    model_name='nd_expectation',
    model_type='nested_dropout',
    model_class=LinearAENestedDropout,
    extra_model_args = {'use_expectation': True},
    input_dim=input_dim,
    hidden_dim=hidden_dim,
    init_scale=0.0001,
    optim_class=torch.optim.Adam,
    extra_optim_args={},
    lr=0.003,
    train_itr=3000,#50000,
    seed=seed
)

# model config contains the model 
model_config = ModelConfig(
        model_name=model_dict['model_name'],
        model_type=model_dict['model_type'],
        model_class=model_dict['model_class'],
        input_dim=model_dict['input_dim'], 
        hidden_dim=model_dict['hidden_dim'],
        init_scale=model_dict['init_scale'],
        extra_model_args=model_dict['extra_model_args'],
        optim_class=model_dict['optim_class'],
        lr=model_dict['lr'],
        extra_optim_args=model_dict['extra_optim_args']
    )

print(model_dict,'\n')
print(model_config.get_model(),'\n')
print(model_config.get_optimizer())

print('Transpose:', metric_transpose_theorem(model_config.get_model()),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(model_config.get_model(), data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(model_config.get_model(), data.eigvectors, data.eigs),'\n')





{'model_name': 'nd_expectation', 'model_type': 'nested_dropout', 'model_class': <class '__main__.LinearAENestedDropout'>, 'extra_model_args': {'use_expectation': True}, 'input_dim': 1000, 'hidden_dim': 20, 'init_scale': 0.0001, 'optim_class': <class 'torch.optim.adam.Adam'>, 'extra_optim_args': {}, 'lr': 0.003, 'train_itr': 3000, 'seed': 1234} 

LinearAENestedDropout(
  (encoder): Linear(in_features=1000, out_features=20, bias=False)
  (decoder): Linear(in_features=20, out_features=1000, bias=False)
) 

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    eps: 1e-08
    foreach: None
    lr: 0.003
    maximize: False
    weight_decay: 0
)
Transpose: 2.013526827795431e-05 

Distance to axis-aligned solution: 0.9951107563416652 

Distance to optimal subspace): 0.9793644726276398 



#### Model #3: Run the model - nested dropout with 40 hidden_dim

In [12]:
modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')

trained_model = train_models(data_loader=loader, train_itr=model_dict['train_itr'], metrics_dict=None, model_configs=model_config)

modelIn = model_config.get_model()
print('Reconstrution Loss:', metric_recon_loss(modelIn, loader),'\n') # full batch loss
print('Loss:', metric_loss(modelIn, loader),'\n')
print('Transpose:', metric_transpose_theorem(modelIn),'\n') # how close encoder and decoder.T are 
print('Distance to axis-aligned solution:', metric_alignment(modelIn, data.eigvectors),'\n') # alignment of decoder columns to ground truth eigenvectors
print('Distance to optimal subspace):', metric_subspace(modelIn, data.eigvectors, data.eigs),'\n')



Reconstrution Loss: 50555.9375 

Loss: 50544.8828125 

Transpose: 2.013526827795431e-05 

Distance to axis-aligned solution: 0.9951107563416652 

Distance to optimal subspace): 0.9793644726276398 

Iteration = 1, Losses: nd_expectation = 50544.8828125 
Iteration = 10, Losses: nd_expectation = 50005.375 
Iteration = 20, Losses: nd_expectation = 49659.06640625 
Iteration = 30, Losses: nd_expectation = 49504.6328125 
Iteration = 40, Losses: nd_expectation = 49428.1875 
Iteration = 50, Losses: nd_expectation = 49391.37109375 
Iteration = 60, Losses: nd_expectation = 49371.125 
Iteration = 70, Losses: nd_expectation = 49358.13671875 
Iteration = 80, Losses: nd_expectation = 49350.11328125 
Iteration = 90, Losses: nd_expectation = 49344.70703125 
Iteration = 100, Losses: nd_expectation = 49340.89453125 
Iteration = 110, Losses: nd_expectation = 49338.078125 
Iteration = 120, Losses: nd_expectation = 49335.91796875 
Iteration = 130, Losses: nd_expectation = 49334.1875 
Iteration = 140, Losses

Iteration = 1400, Losses: nd_expectation = 49319.25390625 
Iteration = 1410, Losses: nd_expectation = 49319.24609375 
Iteration = 1420, Losses: nd_expectation = 49319.24609375 
Iteration = 1430, Losses: nd_expectation = 49319.23828125 
Iteration = 1440, Losses: nd_expectation = 49319.234375 
Iteration = 1450, Losses: nd_expectation = 49319.23046875 
Iteration = 1460, Losses: nd_expectation = 49319.22265625 
Iteration = 1470, Losses: nd_expectation = 49319.21875 
Iteration = 1480, Losses: nd_expectation = 49319.21484375 
Iteration = 1490, Losses: nd_expectation = 49319.2109375 
Iteration = 1500, Losses: nd_expectation = 49319.2109375 
Iteration = 1510, Losses: nd_expectation = 49319.203125 
Iteration = 1520, Losses: nd_expectation = 49319.19921875 
Iteration = 1530, Losses: nd_expectation = 49319.1953125 
Iteration = 1540, Losses: nd_expectation = 49319.19140625 
Iteration = 1550, Losses: nd_expectation = 49319.1875 
Iteration = 1560, Losses: nd_expectation = 49319.1875 
Iteration = 157

Iteration = 2820, Losses: nd_expectation = 49319.02734375 
Iteration = 2830, Losses: nd_expectation = 49319.02734375 
Iteration = 2840, Losses: nd_expectation = 49319.02734375 
Iteration = 2850, Losses: nd_expectation = 49319.02734375 
Iteration = 2860, Losses: nd_expectation = 49319.0234375 
Iteration = 2870, Losses: nd_expectation = 49319.0234375 
Iteration = 2880, Losses: nd_expectation = 49319.02734375 
Iteration = 2890, Losses: nd_expectation = 49319.0234375 
Iteration = 2900, Losses: nd_expectation = 49319.02734375 
Iteration = 2910, Losses: nd_expectation = 49319.0234375 
Iteration = 2920, Losses: nd_expectation = 49319.0234375 
Iteration = 2930, Losses: nd_expectation = 49319.01953125 
Iteration = 2940, Losses: nd_expectation = 49319.0234375 
Iteration = 2950, Losses: nd_expectation = 49319.01953125 
Iteration = 2960, Losses: nd_expectation = 49319.0234375 
Iteration = 2970, Losses: nd_expectation = 49319.0234375 
Iteration = 2980, Losses: nd_expectation = 49319.0234375 
Iterat