In [2]:
import torch
import numpy as np
from model_creator import make_conv2d_model, mod
import tensorly as tl
import tensorly.decomposition as dc
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import scale, normalize
from sklearn.model_selection import cross_val_score
import torchvision.transforms as transforms
import torchvision
from pathlib import Path
from tqdm import tqdm
from collections import Counter
from utils import *
import matplotlib.pyplot as plt
from torch.nn import Sequential, Conv2d
tl.set_backend('pytorch')
device='cpu'
%load_ext autoreload
%autoreload 2

ModuleNotFoundError: No module named 'torchvision'

In [2]:
params = {}
params['n_conv_layers'] = 1
params['use_batch_norm'] = False
params['input_kernel_size'] = (5, 5)
params['conv_dim_change'] = 'double'
params['pool_size'] = (2, 2)
params['activation'] = 'relu'
params['n_dense_layers'] = 0
params['conv_kernel_size'] = (3, 3)
params['initial_kernel_number'] = 15
params['dense_dim'] = 100

In [3]:
# model = mod(*make_conv2d_model((3, 32, 32), 10, params)).to(device)

In [3]:
model = torchvision.models.wide_resnet50_2(pretrained=True)

In [4]:
model.fc = torch.nn.Linear(2048, 200)
experiment = 303
seed = 303
model_id = f"{experiment}_{seed}"
model_path = Path(f"outputs/experiment_{experiment}/data/{model_id}").rglob('*pt')
model_path = list(model_path)[0]
model.load_state_dict(torch.load(model_path))
model.to(device)
print('loaded')

loaded


In [5]:
conv_layers = [model.conv1]
for sequential in [model.layer1, model.layer2, model.layer3, model.layer4]:
    for bottleneck in sequential:
        b = bottleneck
        conv_layers.extend([bottleneck.conv1, bottleneck.conv2, bottleneck.conv3])

In [6]:
b

Bottleneck(
  (conv1): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
)

In [43]:
ls = [a for a in dir(b) if a in bneck_layers]

NameError: name 'x' is not defined

In [8]:
def calculate_vips(pls_model):
    t = pls_model.x_scores_
    w = pls_model.x_weights_
    q = pls_model.y_loadings_
    p, h = w.shape
    vips = np.zeros((p,))
    s = np.diag(np.matmul(np.matmul(np.matmul(t.T,t),q.T), q)).reshape(h, -1)
    total_s = np.sum(s)
    for i in range(p):
        weight = np.array([ (w[i,j] / np.linalg.norm(w[:,j]))**2 for j in range(h) ])
        vips[i] = np.sqrt(p*(np.matmul(s.T, weight))/total_s)
    return vips

In [65]:
cv = RepeatedKFold(n_splits=4, n_repeats=1)

In [8]:
bneck_layers = ['conv1', 'bn1', 'conv2', 'bn2', 'conv3', 'bn3', 'relu']

In [5]:
batch_size = 32
data_dir = Path('tiny-imagenet-200')
data_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.4805, 0.4483, 0.3978), (0.263, 0.257, 0.267)),
])
valid_set = torchvision.datasets.ImageFolder(data_dir / 'val', data_transforms)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size,
                                           shuffle=False, pin_memory=True)
train_set = torchvision.datasets.ImageFolder(data_dir / 'train', data_transforms)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,
                                           shuffle=True, pin_memory=True)

In [6]:
def select_filters(model, valid_loader, valid_set, remove_percent, device):
    """
    worst : list of highest divergence filters (worst filters) across batches
            Can select top-k afterwards.
    imp   : list of divergences from tensor decomposition reconstruction.
            lower means filter is more important.
    """
    worst = []
    model.eval()
    num_layers = 0
    for i, data in tqdm(enumerate(valid_loader),
                        total=len(valid_set) / valid_loader.batch_size):
        out, y = data
        out = out.to(device)
        y = y
        sizes = []
        num_lay = 0
        for j, (name, param) in enumerate(model.named_children()):
            if name in ['avgpool', 'layer3']:
                break
            if type(param) == Sequential:
                for bottle in param:
                    for b in bneck_layers:
                        out = getattr(bottle, b)(out)
                        if b in ['conv1', 'conv2', 'conv3']:
                            nout = out.detach().clone()
                            num_rem = int(nout.shape[1] * remove_percent)
                            cp = dc.tucker(nout, 15)
                            pred = tl.tucker_tensor.tucker_to_tensor(cp)
                            dist = torch.cdist(pred, nout)
                            importance = torch.mean(dist, dim=[0, 2, 3])
                            _, w = torch.topk(importance, num_rem)
                            worst.append(w)
                            num_lay += 1
                            
            else:
                out = param(out)
                if type(param) == Conv2d:
                    nout = out.detach().clone()
                    num_rem = int(nout.shape[1] * remove_percent)
                    cp = dc.tucker(nout, 15)
                    pred = tl.tucker_tensor.tucker_to_tensor(cp)
                    dist = torch.cdist(pred, nout)
                    importance = torch.mean(dist, dim=[0, 2, 3])
                    _, w = torch.topk(importance, num_rem)
                    worst.append(w)
                    num_lay += 1
        if i * valid_loader.batch_size >= 100:
            num_layers = num_lay
            break
    return worst, num_layers

In [9]:
worst, num_lay = select_filters(model, valid_loader, valid_set, 0.5)

  0%|          | 1/312.5 [00:10<54:51, 10.57s/it]


KeyboardInterrupt: 

In [11]:
bad_filt = []
for i in range(num_lay):
    bad_filt.append(worst[i::num_lay])

In [12]:
bye_filt = []
for f in bad_filt:
    rem_filt = [k for k in Counter(torch.stack(f).view(-1).cpu().numpy()).keys()]
    bye_filt.append(rem_filt)
    print(rem_filt, ',')

[62, 47, 8, 31, 3, 25, 53, 10, 21, 58, 29, 11, 43, 61, 54, 63, 16, 49, 42, 48, 28, 5, 7, 60, 40, 6, 12, 41, 9, 44, 2, 45, 32, 4, 39, 55, 34] ,
[70, 51, 99, 7, 6, 43, 75, 47, 56, 44, 120, 4, 21, 100, 86, 62, 26, 109, 10, 39, 54, 110, 0, 46, 61, 123, 35, 79, 22, 117, 9, 96, 63, 14, 32, 59, 104, 115, 119, 5, 36, 57, 121, 82, 60, 19, 125, 64, 73, 88, 34, 106, 89, 95, 42, 65, 27, 102, 45, 127, 101, 50, 68, 81, 13, 2, 17, 97] ,
[82, 56, 11, 48, 12, 54, 77, 118, 5, 3, 26, 20, 58, 55, 24, 126, 23, 42, 127, 2, 35, 29, 124, 97, 63, 22, 81, 33, 78, 90, 17, 83, 101, 93, 70, 27, 1, 85, 79, 37, 46, 87, 86, 18, 38, 84, 15, 64, 75, 59, 9, 116, 112, 28, 61, 51, 73, 94, 123, 74, 109, 71, 120, 66, 105, 43, 57, 92] ,
[135, 118, 107, 152, 40, 128, 186, 167, 240, 156, 143, 223, 216, 102, 196, 133, 109, 154, 103, 96, 6, 72, 211, 251, 146, 105, 125, 244, 238, 44, 37, 90, 120, 218, 115, 67, 53, 101, 246, 57, 91, 199, 13, 39, 5, 68, 197, 208, 229, 41, 226, 108, 7, 34, 235, 88, 159, 81, 248, 54, 230, 155, 180, 7

In [38]:
bye_filt

[[62, 47, 8, 3, 10],
 [70, 51, 99, 7, 6, 43, 75],
 [82, 56, 11, 48, 12, 54, 26],
 [135, 118, 107, 152, 40, 128, 186, 167, 240, 156, 143, 223, 105, 216],
 [95, 111, 4, 22, 104, 46, 107, 99, 91],
 [70, 119, 25, 100, 94, 39, 22, 73, 116],
 [203, 98, 80, 0, 199, 96, 239, 218, 166, 157, 146, 176, 13, 73],
 [126, 20, 89, 27, 7, 86, 3, 28, 16, 110],
 [112, 24, 82, 38, 0, 20],
 [22, 65, 15, 100, 179, 255, 217, 133, 71, 74, 169, 174, 82],
 [95, 113, 56, 78, 255, 250, 127, 74, 210, 197, 108, 121, 31, 129, 12],
 [82, 89, 95, 63, 246, 179, 129, 218, 70, 204, 18, 111, 103, 102, 174, 175],
 [468,
  492,
  277,
  303,
  353,
  157,
  365,
  231,
  481,
  223,
  508,
  200,
  262,
  119,
  66,
  96,
  35,
  124,
  479,
  29,
  291,
  227,
  182,
  235,
  245,
  169,
  472,
  260,
  22,
  345,
  382,
  188],
 [109, 29, 145, 234, 100, 123, 16, 3, 193, 149, 178, 210, 200, 214, 141],
 [2, 227, 37, 17, 99, 186, 82, 161, 125, 217, 6, 79, 149],
 [300,
  112,
  293,
  478,
  414,
  445,
  397,
  437,
  338,
 

In [136]:
[k for k in Counter(torch.stack(worst).view(-1).cpu().numpy()).keys()]

[10, 3, 53]

In [36]:
conv_layers

[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False),
 Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
 Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
 Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
 Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
 Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False),
 Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False),


In [83]:
torch.min(torch.stack(imp))

In [72]:
model = torchvision.models.resnet18(pretrained=True)

In [73]:
TuckerStructured(model.conv1, name='weight', amount=0, dim=-2,filt=70)

torch.Size([64, 3, 7, 7]) 4
tensor([[[1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1., 1.]]])


Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [75]:
model.conv1.weight_mask[0]

tensor([[[0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0.]]])

In [490]:
# dataloader = iter(valid_loader)
x, y = next(dataloader)
x = x.to(device)
ny = y.numpy()

In [491]:
out = model(x)
nout = out.detach()

In [492]:
osample = nout[0]
print(osample.is_cuda)

True


In [503]:
cp = dc.tucker(osample, 22)



In [494]:
print(len(cp[1])) 

4


In [504]:
for i in range(len(cp[1])):
    print(cp[1][i].shape)

torch.Size([15, 15])
torch.Size([28, 22])
torch.Size([28, 22])


In [505]:
# pred = tl.cp_tensor.cp_to_tensor(cp)
pred = tl.tucker_tensor.tucker_to_tensor(cp)

In [506]:
pred.shape

torch.Size([15, 28, 28])

In [507]:
dist = torch.cdist(pred, osample)

In [508]:
dist.shape

torch.Size([15, 28, 28])

In [510]:
importance = torch.mean(dist, dim=[1, 2])

In [511]:
torch.argmin(importance)

tensor(0, device='cuda:0')

In [512]:
importance

tensor([0.6711, 1.2247, 1.7894, 0.7869, 2.0999, 1.8570, 1.0078, 1.4888, 0.9308,
        1.2620, 1.3241, 0.8901, 0.8965, 1.7836, 1.5718], device='cuda:0')

tensor([0.7249, 1.8099, 1.4779, 1.0375, 2.1402, 1.5931, 1.0717, 1.4681, 1.1761,
        1.3171, 1.2156, 1.6594, 1.1599, 2.0530, 1.3948], device='cuda:0')

In [479]:
# bigcp = cp
# imp = importance
print(imp)
torch.mean(torch.cdist(bigcp[1][0], cp[1][1]))

tensor([0.6309, 1.6372, 1.2259, 0.7888, 2.0096, 1.4350, 0.9153, 1.2241, 0.8291,
        1.1813, 1.0735, 1.5589, 0.9121, 1.8906, 1.2141], device='cuda:0')


tensor(7.8082, device='cuda:0')

In [477]:
bigcp[1][0].shape, cp[1][1].shape

(torch.Size([15, 22]), torch.Size([15, 22]))

In [426]:
a = torch.outer(torch.tensor(normalize([[1., 5, 0]])).squeeze(0),
                torch.tensor(normalize([[1., 1, 1]])).squeeze(0))
a += torch.outer(torch.tensor(normalize([[1., 50, 0]])).squeeze(0),
                torch.tensor(normalize([[1., 1, 1]])).squeeze(0))
c = torch.tensor(normalize([[0, 5, 1], [0, 5., 1], [5, 0, 5]]))

In [427]:
b = dc.parafac(c, 1)



In [428]:
pred = tl.cp_tensor.cp_to_tensor(b)

In [429]:
dist = torch.cdist(pred, c)

In [430]:
importance = torch.mean(dist, dim=[1])

In [431]:
importance

tensor([0.4884, 0.4884, 0.8133], dtype=torch.float64)

In [432]:
pred

tensor([[0.0913, 0.9467, 0.2806],
        [0.0913, 0.9467, 0.2806],
        [0.0244, 0.2532, 0.0751]], dtype=torch.float64)

In [433]:
dist

tensor([[0.1289, 0.1289, 1.2072],
        [0.1289, 0.1289, 1.2072],
        [0.7378, 0.7378, 0.9642]], dtype=torch.float64)

In [403]:
b[1]

[tensor([[5.0990],
         [5.0990],
         [5.0990]]),
 tensor([[0.0000],
         [0.9806],
         [0.1961]])]