In [1]:
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from os.path import join as oj
import sys
sys.path.append('../vision')
import numpy as np
from copy import deepcopy
import pickle as pkl
from torch.optim.lr_scheduler import StepLR
from sklearn.decomposition import PCA
from sklearn.metrics import pairwise

import models
from dim_reduction import *
%load_ext autoreload
%autoreload 2

# reduce model by projecting onto pcs that explain "percent_to_explain"
def reduce_model(model, percent_to_explain=0.85):
    model_r = deepcopy(model)
    weight_dict = model_r.state_dict()
    weight_dict_new = deepcopy(model_r.state_dict())
#     print(weight_dict)
    for layer_name in weight_dict.keys():
        if 'weight' in layer_name:
            w = weight_dict[layer_name]
            
            # get number of components
            pca = PCA(n_components=w.shape[1])
            pca.fit(w)
            explained_vars = pca.explained_variance_ratio_
            dim, perc_explained = 0, 0
            while perc_explained <= percent_to_explain:
                perc_explained += explained_vars[dim]
                dim += 1
            
            # actually project
            pca = PCA(n_components=dim)            
            w2 = pca.inverse_transform(pca.fit_transform(w))
            print('shapes', w.shape, w2.shape)
            weight_dict_new[layer_name] = torch.Tensor(w2)
            
    model_r.load_state_dict(weight_dict_new)
    return model_r

             
modelm = models.MnistNet()        
modelr = reduce_model(modelm)

shapes torch.Size([500, 784]) (500, 784)
shapes torch.Size([256, 500]) (256, 500)
shapes torch.Size([10, 256]) (10, 256)


In [2]:
print(modelm.state_dict()['fc1.weight'][:10].numpy().shape)

(10, 784)


In [17]:
from params_vision import p
np.random.seed(p.seed) 
torch.manual_seed(p.seed)    
use_cuda = torch.cuda.is_available()

batch_size = 100
root = oj('/scratch/users/vision/yu_dl/raaz.rsk/data', p.dset)
if not os.path.exists(root):
    os.mkdir(root)


## load mnist dataset     

trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)
model = models.MnistNet()  
if use_cuda:
    model = model.cuda()
 
    
def calc_activation_dims(use_cuda, model, dset_train, dset_test, calc_activations=0):
    if calc_activations > 0:
        dims = []
        for d in [dset_train, dset_test]:

            loader = torch.utils.data.DataLoader(
                     dataset=d,
                     batch_size=calc_activations,
                     shuffle=False)

#             print(calc_activations)
            # just use 1 big batch
            for batch_idx, (x, target) in enumerate(loader):
                if use_cuda:
                    x, target = x.cuda(), target.cuda()
#                 print(x.shape)
                x = Variable(x, volatile=True)
                y = model.forward_all(x)
                y = {key: y[key].data.cpu().numpy().T for key in y.keys()}
#                 print(y.keys())
                for key in y.keys():
                    print(key, y[key].shape)
#                 print(y['fc1'].shape)
                if batch_idx >= 0:
                    break
            act_var_dict = get_explained_var_from_weight_dict(y, activation=True)
            for key in act_var_dict:
                print('vars', key, act_var_dict[key].shape)
#             dims.append()



        

print(p.calc_activations)

calc_activation_dims(use_cuda, model, train_set, test_set, calc_activations=p.calc_activations)

1000
relu2 (256, 1000)
fc1 (500, 1000)
relu1 (500, 1000)
fc2 (256, 1000)
fc3 (10, 1000)
shape (256, 1000) ncomps= 1000
shape (500, 1000) ncomps= 1000
shape (500, 1000) ncomps= 1000
shape (256, 1000) ncomps= 1000
shape (10, 1000) ncomps= 1000
vars relu2 (256,)
vars fc1 (500,)
vars relu1 (500,)
vars fc2 (256,)
vars fc3 (10,)
relu2 (256, 1000)
fc1 (500, 1000)
relu1 (500, 1000)
fc2 (256, 1000)
fc3 (10, 1000)
shape (256, 1000) ncomps= 1000
shape (500, 1000) ncomps= 1000
shape (500, 1000) ncomps= 1000
shape (256, 1000) ncomps= 1000
shape (10, 1000) ncomps= 1000
vars relu2 (256,)
vars fc1 (500,)
vars relu1 (500,)
vars fc2 (256,)
vars fc3 (10,)
