In [0]:
import numpy as np
import pandas as pd
import math
from matplotlib import pyplot as plt


import torch
import torchvision
import torchvision.transforms as transforms


import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tqdm import tqdm

%matplotlib inline

In [0]:
transform = transforms.Compose(
    [transforms.CenterCrop((28,28)),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# transform = transforms.Compose(
#     [transforms.ToTensor(),transforms.CenterCrop(28,28)])

In [0]:
cifar_trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
cifar_testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


170500096it [00:04, 42154560.84it/s]                               


Files already downloaded and verified


In [0]:
cifar_trainset_random = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

cifar_trainset_random.targets[:50000] = np.random.randint(low=0,high=9,size=50000)

Files already downloaded and verified


In [0]:
np.unique(cifar_trainset.targets)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [0]:
trainloader = torch.utils.data.DataLoader(cifar_trainset, batch_size=256,
                                          shuffle=False, num_workers=2)
testloader = torch.utils.data.DataLoader(cifar_testset, batch_size=256,
                                         shuffle=False, num_workers=2)

In [0]:
trainloader_random = torch.utils.data.DataLoader(cifar_trainset_random,batch_size=256,shuffle=False,num_workers=2)

In [0]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [0]:
def xavierweights_init(m):
    if isinstance(m,nn.Linear):
        size = m.weight.size()
        fan_out = size[0] # number of rows
        fan_in = size[1] # number of columns
        variance = np.sqrt(2.0/(fan_in + fan_out))
        
        m.weight.data.normal_(0.0, variance)
        #print(m.weight.data.normal_(0.0, variance))


In [0]:
def uniformweights_init(m):
    if isinstance(m,nn.Linear):
        stdv = 1. / math.sqrt(m.weight.size(1))
        
        m.weight.data.uniform_(-stdv,stdv)
        
        if m.bias is not None:
             m.bias.data.uniform_(-stdv, stdv)

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3*28*28,512)
        self.fc2 = nn.Linear(512,512)
        self.fc3 = nn.Linear(512,512)
        self.fc4 = nn.Linear(512,10)
#         self.conv1 = nn.Conv2d(3, 6, 5)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
#         self.fc1 = nn.Linear(16 * 5 * 5, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        #x = x - x.mean(dim=(0,2),keepdim=True)/x.std(dim=(0,2),keepdim=True)
        
        x = (x.view(-1,3*28*28))
        x = self.fc1(x)
        x1 = F.relu(x)
        x2 = F.relu(self.fc2(x1))
        x3 = F.relu(self.fc3(x2))
        x = self.fc4(x3)
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = x.view(-1, 16 * 5 * 5)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
        return x,{'layer1':x1,'layer2':x2,'layer3':x3}


net1 = Net()
net2 = Net()

In [0]:
net1.apply(xavierweights_init)

Net(
  (fc1): Linear(in_features=2352, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=512, bias=True)
  (fc4): Linear(in_features=512, out_features=10, bias=True)
)

In [0]:
par_xavier = {}
params = list(net1.parameters())
i = 0
for p in params:
    i = i+1
    par_xavier["layer"+str(i)] = p.cpu().data
    #print(p)
par_xavier["layer1"]

tensor([[ 0.0139,  0.0099,  0.0121,  ...,  0.0657, -0.0256,  0.0458],
        [-0.0415,  0.0351, -0.0397,  ...,  0.0231, -0.0255, -0.0152],
        [-0.0394,  0.0068,  0.0057,  ...,  0.0590,  0.0176,  0.0040],
        ...,
        [ 0.0150,  0.0272,  0.0011,  ..., -0.0394, -0.0486, -0.0107],
        [ 0.0013, -0.0607, -0.0101,  ..., -0.0152,  0.0032,  0.0138],
        [-0.0200,  0.0160,  0.0257,  ..., -0.0238,  0.0170,  0.0548]])

In [0]:
net2.apply(uniformweights_init)

Net(
  (fc1): Linear(in_features=2352, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=512, bias=True)
  (fc4): Linear(in_features=512, out_features=10, bias=True)
)

In [0]:
params = list(net2.parameters())
par_uniform = {}
i = 0  
for p in params:
    i = i+1
    par_uniform["layer"+str(i)] = p.cpu().data
    #print(p.cpu().data)
print(par_uniform["layer1"])

tensor([[ 0.0164,  0.0135,  0.0031,  ...,  0.0111, -0.0008,  0.0096],
        [-0.0077, -0.0118, -0.0122,  ..., -0.0170, -0.0077, -0.0086],
        [ 0.0006, -0.0016, -0.0036,  ...,  0.0048, -0.0180, -0.0095],
        ...,
        [-0.0012, -0.0180,  0.0098,  ..., -0.0045, -0.0187,  0.0080],
        [ 0.0077, -0.0079,  0.0067,  ..., -0.0132, -0.0188,  0.0048],
        [ 0.0109,  0.0118,  0.0195,  ..., -0.0002,  0.0020, -0.0008]])


In [0]:
par_uniform["layer1"].shape
np.linalg.norm((par_uniform["layer1"] - par_xavier["layer1"]))

31.819374

In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net1.parameters(), lr=0.01, momentum=0.9)

In [19]:
act = []
loss_cur = []
for epoch in range(100):  # loop over the dataset multiple times
    ep_loss = []
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs,_ = net1(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        

        if i % 50 == 49:    # print every 50 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 50))
            ep_loss.append(running_loss) # loss per minibatch
            running_loss = 0.0
            
    loss_cur.append(np.mean(ep_loss))   #loss per epoch
    if (epoch%5 == 0):
        _,acts= net1(inputs)
        act.append(acts)

print('Finished Training')

[1,    50] loss: 2.039
[1,   100] loss: 1.762
[1,   150] loss: 1.676
[2,    50] loss: 1.551
[2,   100] loss: 1.502
[2,   150] loss: 1.493
[3,    50] loss: 1.411
[3,   100] loss: 1.374
[3,   150] loss: 1.377
[4,    50] loss: 1.309
[4,   100] loss: 1.276
[4,   150] loss: 1.286
[5,    50] loss: 1.223
[5,   100] loss: 1.191
[5,   150] loss: 1.203
[6,    50] loss: 1.143
[6,   100] loss: 1.115
[6,   150] loss: 1.126
[7,    50] loss: 1.065
[7,   100] loss: 1.039
[7,   150] loss: 1.049
[8,    50] loss: 0.988
[8,   100] loss: 0.965
[8,   150] loss: 0.972
[9,    50] loss: 0.913
[9,   100] loss: 0.891
[9,   150] loss: 0.896
[10,    50] loss: 0.841
[10,   100] loss: 0.820
[10,   150] loss: 0.822
[11,    50] loss: 0.768
[11,   100] loss: 0.753
[11,   150] loss: 0.749
[12,    50] loss: 0.696
[12,   100] loss: 0.685
[12,   150] loss: 0.683
[13,    50] loss: 0.631
[13,   100] loss: 0.626
[13,   150] loss: 0.630
[14,    50] loss: 0.572
[14,   100] loss: 0.582
[14,   150] loss: 0.610
[15,    50] loss: 0

In [0]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net2.parameters(), lr=0.01, momentum=0.9)

In [21]:
actu = []
lossu_cur = []
for epoch in range(100):  # loop over the dataset multiple times
    ep_lossu = []
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs,_ = net2(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        

        if i % 50 == 49:    # print every 50 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 50))
            ep_lossu.append(running_loss) # loss per minibatch
            running_loss = 0.0
            
    lossu_cur.append(np.mean(ep_lossu))   #loss per epoch
    if (epoch%5 == 0):
        _,acts= net2(inputs)
        actu.append(acts)

print('Finished Training')

[1,    50] loss: 2.281
[1,   100] loss: 2.141
[1,   150] loss: 1.990
[2,    50] loss: 1.805
[2,   100] loss: 1.730
[2,   150] loss: 1.692
[3,    50] loss: 1.610
[3,   100] loss: 1.562
[3,   150] loss: 1.550
[4,    50] loss: 1.480
[4,   100] loss: 1.448
[4,   150] loss: 1.447
[5,    50] loss: 1.387
[5,   100] loss: 1.364
[5,   150] loss: 1.371
[6,    50] loss: 1.316
[6,   100] loss: 1.294
[6,   150] loss: 1.306
[7,    50] loss: 1.251
[7,   100] loss: 1.230
[7,   150] loss: 1.247
[8,    50] loss: 1.192
[8,   100] loss: 1.170
[8,   150] loss: 1.191
[9,    50] loss: 1.134
[9,   100] loss: 1.113
[9,   150] loss: 1.137
[10,    50] loss: 1.075
[10,   100] loss: 1.059
[10,   150] loss: 1.080
[11,    50] loss: 1.018
[11,   100] loss: 1.004
[11,   150] loss: 1.022
[12,    50] loss: 0.962
[12,   100] loss: 0.950
[12,   150] loss: 0.963
[13,    50] loss: 0.904
[13,   100] loss: 0.897
[13,   150] loss: 0.908
[14,    50] loss: 0.852
[14,   100] loss: 0.846
[14,   150] loss: 0.851
[15,    50] loss: 0

In [0]:
ac = (act[0]["layer1"].cpu()).detach().numpy()

In [0]:
acu = (actu[0]["layer1"].cpu()).detach().numpy()

In [0]:
ac1 = (act[1]["layer1"].cpu()).detach().numpy()
acu1 = (actu[1]["layer1"].cpu()).detach().numpy()

In [0]:
ace = (act[-1]["layer1"].cpu()).detach().numpy()
acue = (actu[-1]["layer1"].cpu()).detach().numpy()

In [26]:
np.linalg.norm(acue>0 - (ace>0))

160.22484201895784

In [27]:
act[-1]["layer1"]>0,actu[-1]["layer1"]>0

(tensor([[0, 0, 1,  ..., 0, 1, 0],
         [1, 0, 0,  ..., 0, 0, 1],
         [1, 0, 0,  ..., 1, 1, 1],
         ...,
         [0, 0, 0,  ..., 0, 1, 1],
         [1, 1, 0,  ..., 0, 1, 0],
         [1, 1, 0,  ..., 1, 1, 0]], dtype=torch.uint8),
 tensor([[1, 1, 0,  ..., 0, 0, 1],
         [1, 1, 0,  ..., 1, 1, 0],
         [1, 1, 0,  ..., 1, 1, 0],
         ...,
         [1, 1, 1,  ..., 0, 0, 1],
         [1, 1, 1,  ..., 1, 1, 1],
         [0, 1, 0,  ..., 1, 0, 0]], dtype=torch.uint8))

In [28]:
np.linalg.norm(acu>0 - (ac>0)),np.linalg.norm(acu1>0 - (ac1>0))

(177.01412373028316, 172.41519654601214)

In [40]:
print(np.linalg.norm(ac>0))
print(np.linalg.norm(ace>0))
print(np.linalg.norm(acu>0))
print(np.linalg.norm(acue>0))

142.9300528230505
130.330349496961
145.5163221085525
123.89511693363868


In [47]:
print(np.linalg.norm( (ac>0) ^ (ace>0)))
print(np.linalg.norm( (acu>0) ^ (acue>0)))
print(np.linalg.norm( (ac>0) ^ (acu>0)))
print(np.linalg.norm( (ace>0) ^ (acue>0)))
# print(np.linalg.norm(acu>0-acue>0))
# print(np.linalg.norm(ac>0 - acu>0))
# print(np.linalg.norm(ace>0 - acue>0))

105.0380883299006
122.81286577553672
145.13442045221387
137.8695035169127


In [46]:
z=(ac>0) ^ (ace>0)
print(z.shape)
print(z[0])

(80, 512)
[ True False False  True False False  True  True False False  True False
  True  True False False  True  True  True  True False False False False
  True False False False False False False False  True False  True False
 False False False  True False False  True  True False False  True  True
  True  True False False False False False False False False False False
 False  True False  True False False False False False  True False False
  True False  True False False False False False  True  True False  True
  True False False False False False  True  True False  True  True False
  True False False False False False  True False False False False  True
 False False  True False  True False  True False False False False False
 False  True False False False False False False False  True False  True
 False  True False False False False False  True False  True False  True
  True  True  True False False False  True False False False False False
 False False  True False False False Fals