In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision import models

In [3]:
# check GPU availability
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
from col_mnist import ColMNIST
trainloader = torch.utils.data.DataLoader(
  ColMNIST('data/mnist', train=True, download=True,
                             transform=torchvision.transforms.Compose([#torchvision.transforms.Resize((224, 224)),
                               torchvision.transforms.ToTensor(),
                             ])),
  batch_size=32, shuffle=True)

testloader = torch.utils.data.DataLoader(
  ColMNIST('data/mnist', train=False, download=True,
                             transform=torchvision.transforms.Compose([#torchvision.transforms.Resize((224, 224)),
                               torchvision.transforms.ToTensor()
                             ])),
  batch_size=32, shuffle=True)

In [5]:
from three_d_shapes_ds import ThreeDShapes
trainloader = torch.utils.data.DataLoader(
                                      ThreeDShapes(transform=torchvision.transforms.Compose([torchvision.transforms.ToPILImage(), torchvision.transforms.Resize((32, 32)),
                                        torchvision.transforms.ToTensor(),
                                      ])),
                                      batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(
                                      ThreeDShapes(transform=torchvision.transforms.Compose([torchvision.transforms.ToPILImage(), torchvision.transforms.Resize((32, 32)),
                                        torchvision.transforms.ToTensor(),
                                      ])),
                                      batch_size=32, shuffle=True)

In [7]:
l = ThreeDShapes(transform=torchvision.transforms.Compose([torchvision.transforms.ToPILImage(), 
                                                           torchvision.transforms.Resize((32, 32)), 
                                                           torchvision.transforms.ToTensor()])).labels

In [30]:
import numpy as np
loc = np.where((l[:, 0] <= 0.1) & (l[:, 1] <= 0.1) & (l[:, 2] <= 0.1) & (l[:, 3] <= 0.85) & (l[:, 4] <= 1) & (l[:, 5] <= -25))

In [34]:
l[loc[0]]

array([[  0.        ,   0.        ,   0.        ,   0.75      ,
          0.        , -30.        ],
       [  0.        ,   0.        ,   0.        ,   0.75      ,
          0.        , -25.71428571],
       [  0.        ,   0.        ,   0.        ,   0.75      ,
          1.        , -30.        ],
       [  0.        ,   0.        ,   0.        ,   0.75      ,
          1.        , -25.71428571],
       [  0.        ,   0.        ,   0.        ,   0.82142857,
          0.        , -30.        ],
       [  0.        ,   0.        ,   0.        ,   0.82142857,
          0.        , -25.71428571],
       [  0.        ,   0.        ,   0.        ,   0.82142857,
          1.        , -30.        ],
       [  0.        ,   0.        ,   0.        ,   0.82142857,
          1.        , -25.71428571],
       [  0.        ,   0.        ,   0.1       ,   0.75      ,
          0.        , -30.        ],
       [  0.        ,   0.        ,   0.1       ,   0.75      ,
          0.        , -25.7

In [33]:
loc[0]

0

In [6]:
ds = ThreeDShapes(transform=torchvision.transforms.Compose([torchvision.transforms.ToPILImage(), 
                                                           torchvision.transforms.Resize((32, 32)), 
                                                           torchvision.transforms.ToTensor()]), filtered = True)

TypeError: Simple selection can't process array([], dtype=int64)

In [32]:
vgg16 = models.vgg16(pretrained=True)
vgg16.to(device)
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [33]:
# change the number of classes 
vgg16.classifier[6].out_features = 30
# freeze convolution weights
for param in vgg16.features.parameters():
    param.requires_grad = True

In [34]:
from models import DisentangledLinear, BlockDropout

n_classes = 30


vgg16.classifier[3] = DisentangledLinear(vgg16.classifier[3].in_features, vgg16.classifier[3].out_features).to(device)
vgg16.classifier[6] = DisentangledLinear(vgg16.classifier[6].in_features, n_classes).to(device)
vgg16.classifier[5] = BlockDropout(vgg16.classifier[6], ncc=2, apply_to="in")

for param in vgg16.features.parameters():
    param.requires_grad = True

In [35]:
# optimizer
optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001, momentum=0.9)
# loss function
criterion = nn.CrossEntropyLoss()

In [36]:
# validation function
def validate(model, test_dataloader):
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    for int, data in enumerate(test_dataloader):
        data, (target, dclr_idx, bclr_idx) = data[0], data[1]
        target += bclr_idx*10
        data = data.to(device)
        target = target.to(device)
        output = model(data)
        loss = criterion(output, target)
        
        val_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        val_running_correct += (preds == target).sum().item()
    
    val_loss = val_running_loss/len(test_dataloader.dataset)
    val_accuracy = 100. * val_running_correct/len(test_dataloader.dataset)
    print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}')
    
    return val_loss, val_accuracy

In [37]:
def neuron_wise_br(model, layer, blocks, examples, ncc):
    model.eval()
    relative_error = [None]*layer.out_features
    for n in range(layer.out_features):
        mask = torch.ones(layer.out_features, dtype=torch.bool, device=device)
        mask[n] = 0
        a_n = normalize_w(layer.weight[mask])
        _, s, _ = torch.svd(a_n)
        relative_error[n] = ncc - torch.sum(s[ncc]).detach().cpu()
    return relative_error

In [38]:
from models import block_regularizer, compute_layer_blocks_in
from spectral_utils import normalize_w
import numpy as np
ncc = 2 #number of connected components

def prune(model, layer_out, layer_in, ncc):
    blocks = compute_layer_blocks_in(layer_out, ncc)
    for batch_features in testloader:
        batch_features = batch_features[0]
        test_examples = batch_features.to(device)
        break
    re = neuron_wise_br(model, layer_out, blocks, test_examples, ncc)
    removal_mask = torch.ones(layer_out.in_features, dtype=torch.bool)
    removal_mask[np.argmin(re)] = 0
    layer_out.remove_neurons_in(removal_mask)
    layer_in.remove_neurons_out(removal_mask)
    optimizer = optim.SGD(vgg16.classifier.parameters(), lr=0.001, momentum=0.9)

In [41]:
# training function
n_epochs = 30
total_batches = len(trainloader)*n_epochs
layer_size_reduction = 1000
def fit(model, train_dataloader):
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    for i, data in enumerate(train_dataloader):
        data, (target, dclr_idx, bclr_idx) = data[0], data[1]
        target += bclr_idx*10
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        train_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        train_running_correct += (preds == target).sum().item()
        loss.backward()
        optimizer.step()
        if (i)%(total_batches/(layer_size_reduction)) == 0:
            block_reg = block_regularizer(model.classifier[6], ncc)
            print("Block regularizer "+str(block_reg.item()))
            prune(model, vgg16.classifier[6], vgg16.classifier[3], ncc)
            
    train_loss = train_running_loss/len(train_dataloader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
    print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
    
    return train_loss, train_accuracy

In [42]:
train_loss , train_accuracy = [], []
val_loss , val_accuracy, br = [], [], []
start = time.time()
for epoch in range(n_epochs):
    print(epoch)
    train_epoch_loss, train_epoch_accuracy = fit(vgg16, trainloader)
    val_epoch_loss, val_epoch_accuracy = validate(vgg16, testloader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    torch.save(vgg16, 'vgg16disen_e'+str(epoch)+'.pt')
end = time.time()
print((end-start)/60, 'minutes')

0
tensor([[  0.3000,   0.4000,   0.4000,   0.8214,   1.0000,  17.1429],
        [  0.5000,   0.8000,   0.8000,   0.7500,   2.0000,  12.8571],
        [  0.7000,   0.5000,   0.3000,   1.1071,   3.0000,  17.1429],
        [  0.3000,   0.5000,   0.3000,   1.2500,   1.0000, -12.8571],
        [  0.2000,   0.9000,   0.7000,   0.8929,   1.0000,  25.7143],
        [  0.4000,   0.3000,   0.8000,   1.0357,   0.0000,  -8.5714],
        [  0.4000,   0.8000,   0.2000,   1.1071,   3.0000,   0.0000],
        [  0.3000,   0.2000,   0.2000,   0.8214,   2.0000,  12.8571],
        [  0.7000,   0.1000,   0.3000,   1.1071,   2.0000,  -4.2857],
        [  0.7000,   0.6000,   0.7000,   0.8929,   1.0000,  25.7143],
        [  0.0000,   0.3000,   0.2000,   1.0357,   2.0000,   4.2857],
        [  0.4000,   0.6000,   0.4000,   1.1071,   0.0000,  17.1429],
        [  0.9000,   0.4000,   0.2000,   1.1786,   3.0000,  -8.5714],
        [  0.1000,   0.8000,   0.3000,   0.8214,   3.0000, -17.1429],
        [  0.8000,

RuntimeError: Expected object of scalar type Long but got scalar type Double for argument #2 'target' in call to _thnn_nll_loss_forward

In [None]:
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validataion accuracy')
plt.legend()
plt.savefig('accuracy.png')
plt.show()

In [None]:
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validataion loss')
plt.legend()
plt.savefig('loss.png')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
from col_mnist import ColMNIST

plt.rcParams["axes.grid"] = False
device = "cuda" if torch.cuda.is_available() else "cpu"

def imshow(img):
    #img = img / 2 + 0.5     # unnormalize
    npimg = img.cpu().numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

dataiter = iter(testloader)
images, (target, dclr_idx, bclr_idx) = dataiter.next()
target += bclr_idx*10
imshow(torchvision.utils.make_grid(images))
img_shape = images[0].shape
print("Image shape: {}".format(img_shape))
print(target)

In [None]:
output = vgg16(images)
output.argmax(dim=1)

In [None]:
from explainn_code.grab_functions import db_from_dat_with_labels, write_pic_as_sets

tail = []
head = []

for i, data in enumerate(train_dataloader):
    data, (target, dclr_idx, bclr_idx) = data[0], data[1]
    target += bclr_idx*10
    data = data.to(device)
    target = target.to(device)
    output = model(data).cpu()

    head.append(vgg16.classifier[6].detach().cpu().numpy())
    tail.append(target.detach().cpu().numpy())
head = np.concatenate(head)
tail = np.concatenate(tail)

In [None]:
write_pic_as_sets(head, "vgg_head_blocked_v1.data")

In [None]:
write_pic_as_sets(tail "tail_v1.dat")

In [None]:
!conda info --envs