In [9]:
import torch
import torch.nn.functional as F
from torchvision import datasets, transforms

# GET THE TRAINING DATASET
train_data = datasets.MNIST(
    root='MNIST-data',                        # where is the data (going to be) stored
    transform=transforms.ToTensor(),          # transform the data from a PIL image to a tensor
    train=True,                               # is this training data?
    download=True                             # should i download it if it's not already here?
)

# GET THE TEST DATASET
test_data = datasets.MNIST(
    root='MNIST-data', # where is the data (going to be) stored
    transform=transforms.ToTensor(), # transform the data from a PIL image to a tensor
    train=False,    # this is not the training set
)

# PRINT THEIR LENGTHS AND VISUALISE AN EXAMPLE
ex = train_data[0] # get the first example
x = ex[0] # get the features (actual tensor data -the first thing in the example)
y = ex[1] # get the labels (second thing in the example)
print('Features:', x)
print('Label:', y)
t = transforms.ToPILImage() # create the transform that can be called to convert the tensor into a PIL Image
img = t(x)    # call the transform on the tensor
#img.show()    # show the image

Features: tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.000

In [10]:
# FURTHER SPLIT THE TRAINING INTO TRAINING AND VALIDATION
train_data, val_data = torch.utils.data.random_split(train_data, [50000, 10000])    # split into 50K training & 10K validation

In [11]:
batch_size = 256

# MAKE TRAINING DATALOADER
train_loader = torch.utils.data.DataLoader( # create a data loader
    train_data, # what dataset should it sample from?
    shuffle=True, # should it shuffle the examples?
    batch_size=batch_size # how large should the batches that it samples be?
)

# MAKE VALIDATION DATALOADER
val_loader = torch.utils.data.DataLoader(
    val_data,
    shuffle=True,
    batch_size=batch_size
)

# MAKE TEST DATALOADER
test_loader = torch.utils.data.DataLoader(
    test_data,
    shuffle=True,
    batch_size=batch_size
)

In [69]:
import random
class OGNN(torch.nn.Module): # create a neural network class
    def __init__(self, originators=2): # initialiser
        super().__init__() # initialise the parent class
        self.l1_originators = torch.nn.ModuleList([torch.nn.Linear(784, 1024) for i in range(originators)])
        self.l2_originators = torch.nn.ModuleList([torch.nn.Linear(1024, 256) for i in range(originators)])
        self.l3_originators = torch.nn.ModuleList([torch.nn.Linear(256, 10) for i in range(originators)])
        
    def forward(self, x): # define the forward pass
        x = x.view(-1, 784) # flatten out our image features into vectors
        x = F.relu(random.choice(self.l1_originators)(x)) # pass through the first linear layer
        x = F.relu(random.choice(self.l2_originators)(x)) # pass through the first linear layer
        x = F.softmax(random.choice(self.l3_originators)(x), dim=1) # pass through the first linear layer
        return x # return output
    
    #def predict(self, x):

class NN(torch.nn.Module): # create a neural network class
    def __init__(self): # initialiser
        super().__init__() # initialise the parent class
        self.layer1 = torch.nn.Linear(784, 1024) # create our first linear layer
        self.layer2 = torch.nn.Linear(1024, 256) # create our second linear layer
        self.layer3 = torch.nn.Linear(256, 10) # create our third linear layer
        
    def forward(self, x): # define the forward pass
        x = x.view(-1, 784) # flatten out our image features into vectors
        x = self.layer1(x) # pass through the first linear layer
        x = F.relu(x) # apply activation function
        x = self.layer2(x) # pass through the second linear layer
        x = F.relu(x) # apply activation function
        x = self.layer3(x) # pass through the third linear layer
        x = F.softmax(x) # apply activation function
        return x # return output

In [89]:
F.mse_loss(my_nn.l1_originators[0].weight, my_nn.l1_originators[1].weight)

tensor(0.0008, grad_fn=<MeanBackward0>)

In [97]:
my_nn = OGNN(originators=2) # initialise our model

# CREATE OUR OPTIMISER
optimiser = torch.optim.Adam(              # what optimiser should we use?
    my_nn.parameters(),          # what should it optimise?
)
        
# CREATE OUR CRITERION
criterion = torch.nn.CrossEntropyLoss() # returns a callable object that compares our predictions to our labels and returns our loss

# SET UP TRAINING VISUALISATION
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter() # we will use this to show our models performance on a graph
    
# TRAINING LOOP
def train(model, epochs):
    for epoch in range(epochs):
        for idx, minibatch in enumerate(train_loader): # for each mini-batch sampled from the training dataloader
            inputs, labels = minibatch # unpack the inputs and labels from the minibatch
            prediction = model(inputs) # pass the data forward through the model
            reg_loss = 100*torch.sum(torch.Tensor([F.mse_loss(my_nn.l1_originators[0].weight, my_nn.l1_originators[1].weight), F.mse_loss(my_nn.l2_originators[0].weight, my_nn.l2_originators[1].weight), F.mse_loss(my_nn.l3_originators[0].weight, my_nn.l3_originators[1].weight)]))# compute the loss
            loss = criterion(prediction, labels)# compute the loss
            total_loss = reg_loss+loss
            print('Epoch:', epoch, '\tBatch:', idx, '\tLoss:', loss)
            optimiser.zero_grad() # reset the gradients attribute of each of the model's params to zero
            total_loss.backward() # backward pass to compute and set all of the model param's gradients
            optimiser.step() # update the model's parameters
            writer.add_scalar('Loss/Train', loss, epoch*len(train_loader) + idx) # write loss to a graph
            
            
train(my_nn, 8) # train for 10 epochs

Epoch: 0 	Batch: 0 	Loss: tensor(2.3025, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 1 	Loss: tensor(2.3029, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 2 	Loss: tensor(2.3024, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 3 	Loss: tensor(2.2833, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 4 	Loss: tensor(2.3016, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 5 	Loss: tensor(2.3022, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 6 	Loss: tensor(2.3018, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 7 	Loss: tensor(2.2919, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 8 	Loss: tensor(2.2787, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 9 	Loss: tensor(2.2845, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 10 	Loss: tensor(2.2683, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 11 	Loss: tensor(2.2779, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 12 	Loss: tensor(2.2083, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 13 	Loss: tensor(2.2283, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 14 	Loss: tensor(2.1539, gr

Epoch: 0 	Batch: 119 	Loss: tensor(1.6277, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 120 	Loss: tensor(1.6323, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 121 	Loss: tensor(1.6460, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 122 	Loss: tensor(1.6888, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 123 	Loss: tensor(1.6777, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 124 	Loss: tensor(1.6349, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 125 	Loss: tensor(1.6658, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 126 	Loss: tensor(1.6607, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 127 	Loss: tensor(1.5923, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 128 	Loss: tensor(1.6190, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 129 	Loss: tensor(1.6185, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 130 	Loss: tensor(1.6441, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 131 	Loss: tensor(1.6542, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 132 	Loss: tensor(1.5930, grad_fn=<NllLossBackward>)
Epoch: 0 	Batch: 133

Epoch: 1 	Batch: 44 	Loss: tensor(1.5553, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 45 	Loss: tensor(1.5710, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 46 	Loss: tensor(1.5942, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 47 	Loss: tensor(1.5700, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 48 	Loss: tensor(1.5643, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 49 	Loss: tensor(1.5875, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 50 	Loss: tensor(1.5323, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 51 	Loss: tensor(1.5974, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 52 	Loss: tensor(1.5416, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 53 	Loss: tensor(1.5768, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 54 	Loss: tensor(1.5298, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 55 	Loss: tensor(1.5782, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 56 	Loss: tensor(1.5319, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 57 	Loss: tensor(1.5588, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 58 	Loss: tensor(

Epoch: 1 	Batch: 167 	Loss: tensor(1.5672, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 168 	Loss: tensor(1.5294, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 169 	Loss: tensor(1.5788, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 170 	Loss: tensor(1.5609, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 171 	Loss: tensor(1.5339, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 172 	Loss: tensor(1.5299, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 173 	Loss: tensor(1.5623, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 174 	Loss: tensor(1.5682, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 175 	Loss: tensor(1.5260, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 176 	Loss: tensor(1.5423, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 177 	Loss: tensor(1.5470, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 178 	Loss: tensor(1.5481, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 179 	Loss: tensor(1.5499, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 180 	Loss: tensor(1.5316, grad_fn=<NllLossBackward>)
Epoch: 1 	Batch: 181

Epoch: 2 	Batch: 93 	Loss: tensor(1.5264, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 94 	Loss: tensor(1.5254, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 95 	Loss: tensor(1.5022, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 96 	Loss: tensor(1.5175, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 97 	Loss: tensor(1.5435, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 98 	Loss: tensor(1.5301, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 99 	Loss: tensor(1.5344, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 100 	Loss: tensor(1.5151, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 101 	Loss: tensor(1.5215, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 102 	Loss: tensor(1.5281, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 103 	Loss: tensor(1.5430, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 104 	Loss: tensor(1.5123, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 105 	Loss: tensor(1.5556, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 106 	Loss: tensor(1.5593, grad_fn=<NllLossBackward>)
Epoch: 2 	Batch: 107 	Loss:

Epoch: 3 	Batch: 18 	Loss: tensor(1.5250, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 19 	Loss: tensor(1.5110, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 20 	Loss: tensor(1.5106, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 21 	Loss: tensor(1.5247, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 22 	Loss: tensor(1.5122, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 23 	Loss: tensor(1.5413, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 24 	Loss: tensor(1.5213, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 25 	Loss: tensor(1.5034, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 26 	Loss: tensor(1.5097, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 27 	Loss: tensor(1.5147, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 28 	Loss: tensor(1.5286, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 29 	Loss: tensor(1.5350, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 30 	Loss: tensor(1.5044, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 31 	Loss: tensor(1.5251, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 32 	Loss: tensor(

Epoch: 3 	Batch: 140 	Loss: tensor(1.5677, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 141 	Loss: tensor(1.4860, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 142 	Loss: tensor(1.4953, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 143 	Loss: tensor(1.5103, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 144 	Loss: tensor(1.5129, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 145 	Loss: tensor(1.4886, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 146 	Loss: tensor(1.5097, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 147 	Loss: tensor(1.5266, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 148 	Loss: tensor(1.4940, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 149 	Loss: tensor(1.5156, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 150 	Loss: tensor(1.4860, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 151 	Loss: tensor(1.5351, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 152 	Loss: tensor(1.4993, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 153 	Loss: tensor(1.5065, grad_fn=<NllLossBackward>)
Epoch: 3 	Batch: 154

Epoch: 4 	Batch: 66 	Loss: tensor(1.5026, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 67 	Loss: tensor(1.5157, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 68 	Loss: tensor(1.4900, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 69 	Loss: tensor(1.5069, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 70 	Loss: tensor(1.5096, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 71 	Loss: tensor(1.5268, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 72 	Loss: tensor(1.5133, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 73 	Loss: tensor(1.5086, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 74 	Loss: tensor(1.5288, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 75 	Loss: tensor(1.4961, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 76 	Loss: tensor(1.4829, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 77 	Loss: tensor(1.5205, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 78 	Loss: tensor(1.5343, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 79 	Loss: tensor(1.5066, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 80 	Loss: tensor(

Epoch: 4 	Batch: 188 	Loss: tensor(1.5151, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 189 	Loss: tensor(1.5226, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 190 	Loss: tensor(1.5007, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 191 	Loss: tensor(1.5004, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 192 	Loss: tensor(1.5241, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 193 	Loss: tensor(1.5077, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 194 	Loss: tensor(1.5047, grad_fn=<NllLossBackward>)
Epoch: 4 	Batch: 195 	Loss: tensor(1.5043, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 0 	Loss: tensor(1.5077, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 1 	Loss: tensor(1.5133, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 2 	Loss: tensor(1.4999, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 3 	Loss: tensor(1.4917, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 4 	Loss: tensor(1.5118, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 5 	Loss: tensor(1.5338, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 6 	Loss: tensor

Epoch: 5 	Batch: 116 	Loss: tensor(1.4945, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 117 	Loss: tensor(1.5127, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 118 	Loss: tensor(1.4944, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 119 	Loss: tensor(1.5140, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 120 	Loss: tensor(1.4982, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 121 	Loss: tensor(1.4900, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 122 	Loss: tensor(1.4839, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 123 	Loss: tensor(1.5126, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 124 	Loss: tensor(1.5115, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 125 	Loss: tensor(1.4853, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 126 	Loss: tensor(1.4900, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 127 	Loss: tensor(1.4850, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 128 	Loss: tensor(1.4944, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 129 	Loss: tensor(1.5018, grad_fn=<NllLossBackward>)
Epoch: 5 	Batch: 130

Epoch: 6 	Batch: 40 	Loss: tensor(1.4983, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 41 	Loss: tensor(1.5005, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 42 	Loss: tensor(1.4944, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 43 	Loss: tensor(1.5016, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 44 	Loss: tensor(1.5282, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 45 	Loss: tensor(1.4845, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 46 	Loss: tensor(1.4811, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 47 	Loss: tensor(1.5263, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 48 	Loss: tensor(1.4985, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 49 	Loss: tensor(1.4895, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 50 	Loss: tensor(1.4974, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 51 	Loss: tensor(1.4881, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 52 	Loss: tensor(1.5017, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 53 	Loss: tensor(1.4763, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 54 	Loss: tensor(

Epoch: 6 	Batch: 159 	Loss: tensor(1.5017, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 160 	Loss: tensor(1.4897, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 161 	Loss: tensor(1.5050, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 162 	Loss: tensor(1.4996, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 163 	Loss: tensor(1.5124, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 164 	Loss: tensor(1.4924, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 165 	Loss: tensor(1.4955, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 166 	Loss: tensor(1.5078, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 167 	Loss: tensor(1.4813, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 168 	Loss: tensor(1.4912, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 169 	Loss: tensor(1.4911, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 170 	Loss: tensor(1.4981, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 171 	Loss: tensor(1.5071, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 172 	Loss: tensor(1.4992, grad_fn=<NllLossBackward>)
Epoch: 6 	Batch: 173

Epoch: 7 	Batch: 86 	Loss: tensor(1.4943, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 87 	Loss: tensor(1.4872, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 88 	Loss: tensor(1.4806, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 89 	Loss: tensor(1.5012, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 90 	Loss: tensor(1.4962, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 91 	Loss: tensor(1.4779, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 92 	Loss: tensor(1.4883, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 93 	Loss: tensor(1.5118, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 94 	Loss: tensor(1.4990, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 95 	Loss: tensor(1.5104, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 96 	Loss: tensor(1.4969, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 97 	Loss: tensor(1.4936, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 98 	Loss: tensor(1.4920, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 99 	Loss: tensor(1.4892, grad_fn=<NllLossBackward>)
Epoch: 7 	Batch: 100 	Loss: tensor

In [94]:
import numpy as np
            
def test(model):
    num_correct = 0
    num_examples = len(test_data) # test DATA not test LOADER
    for inputs, labels in test_loader: # for all exampls, over all mini-batches in the test dataset
        predictions = model(inputs) # make prediction
        predictions = torch.max(predictions, axis=1) # reduce to find max indices along direction which column varies
        predictions = predictions[1] # torch.max returns (values, indices)
        num_correct += int(sum(predictions == labels))
    percent_correct = num_correct / num_examples * 100 # compute percentage
    print('Accuracy:', percent_correct)
    
test(my_nn)

Accuracy: 96.61


In [96]:
diffs_interlayer = [
    F.mse_loss(my_nn.l1_originators[0].weight, my_nn.l1_originators[1].weight),
    F.mse_loss(my_nn.l2_originators[0].weight, my_nn.l2_originators[1].weight),
    F.mse_loss(my_nn.l3_originators[0].weight, my_nn.l3_originators[1].weight)
]
diffs
#

[tensor(0.0023, grad_fn=<MeanBackward0>),
 tensor(0.0013, grad_fn=<MeanBackward0>),
 tensor(0.0020, grad_fn=<MeanBackward0>)]

In [66]:
diffs_interlayer = [
    np.mean([F.mse_loss(list(my_nn.parameters())[0], list(my_nn.parameters())[4]), F.mse_loss(list(my_nn.parameters())[0], list(my_nn.parameters())[6]), F.mse_loss(list(my_nn.parameters())[2], list(my_nn.parameters())[4]), F.mse_loss(list(my_nn.parameters())[2], list(my_nn.parameters())[6])]),
    np.mean([F.mse_loss(list(my_nn.parameters())[4], list(my_nn.parameters())[8]), F.mse_loss(list(my_nn.parameters())[4], list(my_nn.parameters())[10]), F.mse_loss(list(my_nn.parameters())[6], list(my_nn.parameters())[8]), F.mse_loss(list(my_nn.parameters())[6], list(my_nn.parameters())[10])]),
    #F.mse_loss(list(my_nn.parameters())[2], list(my_nn.parameters())[4]),
    #F.mse_loss(list(my_nn.parameters())[0], list(my_nn.parameters())[4])
    #F.mse_loss(list(my_nn.parameters())[4], list(my_nn.parameters())[6]),
    #F.mse_loss(list(my_nn.parameters())[8], list(my_nn.parameters())[10]),
]
diffs

  


RuntimeError: The size of tensor a (784) must match the size of tensor b (1024) at non-singleton dimension 1

[tensor(0.0023, grad_fn=<MeanBackward0>),
 tensor(0.0013, grad_fn=<MeanBackward0>),
 tensor(0.0020, grad_fn=<MeanBackward0>)]

In [None]:
my_nn.state_dict()