In [1]:
#!/usr/bin/env python

import torch
from torch import nn
from torch import optim
from torch import Tensor
from torch.nn import functional as F
import dlc_practical_prologue as prologue
import matplotlib.pyplot as plt
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
%matplotlib notebook

In [2]:
##########################################à
#CONTENT
# in the first part we train a net w. 1 conv net to learn the digits of 14x14 pics
# in the second part we train a net w. 2 conv net to learn the digits of 14x14 pics
# in the third part we train the standard con nn to learn mnist
# sample data are always normalized and use one hot label encoding

In [3]:
#generate_data
mnist_train_set = MNIST('./data/mnist/', train = True, download = False)
mnist_test_set =  MNIST('./data/mnist/', train = False, download = False)

N = 10**3
train_input,train_target,train_classes,test_input,test_target,test_classes=prologue.generate_pair_sets(N)

In [4]:
train_input.shape

torch.Size([1000, 2, 14, 14])

In [5]:
#convert target to one_hot_label
train_target = prologue.convert_to_one_hot_labels(train_target, train_target)
test_target = prologue.convert_to_one_hot_labels(test_target, test_target)

#convert classes to one_hot_label. Faster way to do it? 
temp1 = prologue.convert_to_one_hot_labels(train_classes[:,0], train_classes[:,0])
temp2 = prologue.convert_to_one_hot_labels(train_classes[:,1], train_classes[:,1])        
train_classes = torch.cat((temp1.unsqueeze(2), temp2.unsqueeze(2)), dim = 2)
train_classes = torch.transpose(train_classes, 1, 2)

temp1 = prologue.convert_to_one_hot_labels(test_classes[:,0], test_classes[:,0])
temp2 = prologue.convert_to_one_hot_labels(test_classes[:,1], test_classes[:,1])        
test_classes = torch.cat((temp1.unsqueeze(2), temp2.unsqueeze(2)), dim = 2)
test_classes = torch.transpose(test_classes, 1, 2)

test_classes.shape, train_classes.shape

(torch.Size([1000, 2, 10]), torch.Size([1000, 2, 10]))

In [6]:
#just a (visual) check
index = torch.randint(1000, (1,)).item()
fig = plt.figure()
fig.add_subplot(1, 2, 1)
plt.imshow(train_input[index][0])
fig.add_subplot(1, 2, 2)
plt.imshow(train_input[index][1])
print('classes')
print(train_classes[index][0])
print(train_classes[index][1])
print('target')
print(train_target[index])

<IPython.core.display.Javascript object>

classes
tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
target
tensor([0, 1])


In [7]:
#convert to float
train_input = train_input.float()
train_target = train_target.float()

test_input = test_input.float()
test_target = test_target.float()

train_classes = train_classes.float()
test_classes = test_classes.float()

In [8]:
#taking just the first digit picture and using classes as target
train_input = train_input[:,0,:,:]
test_input = test_input[:,0,:,:]

train_input = train_input.unsqueeze(1)
test_input = test_input.unsqueeze(1)

train_target = train_classes[:,0]
test_target = test_classes[:,0]

In [9]:
#Claim: the data is not normalized
print('media train {:0.2f}'.format(train_input.mean()), '; std train {:0.2f}'.format( train_input.std()))
print('media test {:0.2f}'.format(test_input.mean()), '; std test {:0.2f}'.format( test_input.std()))

media train 33.33 ; std train 71.40
media test 33.42 ; std test 71.51


In [10]:
#normalize the data and check
mu, std = train_input.mean(), train_input.std()
train_input = train_input.sub_(mu).div_(std)
test_input = test_input.sub_(mu).div_(std)

print('media train {:0.2f}'.format(train_input.mean()), '; std train {:0.2f}'.format( train_input.std()))
print('media test {:0.2f}'.format(test_input.mean()), '; std test {:0.2f}'.format( test_input.std()))

media train 0.00 ; std train 1.00
media test 0.00 ; std test 1.00


In [11]:
#Base functions adapted from the practicals
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        #self.conv2 = nn.Conv2d(32, 64, kernel_size=2)
        self.fc1 = nn.Linear(512, 60)
        self.fc2 = nn.Linear(60, 10)
    
    def n_params(self):
        n = 0
        for params in self.parameters():
            n += params.numel()
        return n

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        #x = self.conv2(x)
        #x = F.relu(F.max_pool2d(x, kernel_size=3, stride=3))
        x = F.relu(self.fc1(x.view(-1, 512)))
        x = F.softmax(self.fc2(x), dim=1)
        return x

In [12]:
#define training function
def train_model(model, train_input, train_target, mini_batch_size, nb_epochs):
    criterion = nn.MSELoss()
    eta = 1e-3
    
    optimizer = torch.optim.Adam(model.parameters(), lr=eta)

    for e in range( max(nb_epochs) ):
        acc_loss = 0
        
        for b in range(0, train_input.size(0), mini_batch_size):
            optimizer.zero_grad()
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size) )
            acc_loss = acc_loss + loss.item()
            loss.backward()
            optimizer.step()

        if (e+1 == nb_epochs).any(): 
            #print the number of epochs used and the loss achieved
            print(e, acc_loss)

            #compute the test number of errors
            nb_test_errors = compute_nb_errors(model, test_input, test_target, mini_batch_size)
            print(e, 'epochs, test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                              nb_test_errors, test_input.size(0)))

In [13]:
#define compute error function
def compute_nb_errors(model, input, target, mini_batch_size):
    nb_errors = 0

    _, true_class = target.max(1)
    for b in range(0, input.size(0), mini_batch_size):
        output = model(input.narrow(0, b, mini_batch_size))
        _, predicted_classes = output.max(1)
        for k in range(mini_batch_size):
            if true_class[b + k]!=predicted_classes[k]:
                nb_errors = nb_errors + 1

    return nb_errors

In [14]:
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([100, 200, 250, 300, 350, 400, 450, 500])
models = Net()

In [15]:
#train model
train_model(models, train_input, train_target, mini_batch_size, nb_epochs)
print(models.n_params())

99 0.002429753018077463
99 epochs, test error Net 5.10% 51/1000
199 0.001641185043808946
199 epochs, test error Net 5.30% 53/1000
249 0.0019040549054807343
249 epochs, test error Net 4.90% 49/1000
299 0.0013722052614184577
299 epochs, test error Net 4.10% 41/1000
349 0.00193407408312396
349 epochs, test error Net 5.50% 55/1000
399 0.0020551018315018155
399 epochs, test error Net 4.90% 49/1000
449 0.0015028273061261643
449 epochs, test error Net 5.00% 50/1000
499 0.001353281007084206
499 epochs, test error Net 5.30% 53/1000
31710


In [16]:
#test error
#for i in range(5):
#    nb_test_errors = compute_nb_errors(models[i], test_input, test_target, mini_batch_size)
#    print('With ', nb_epochs[i], 'epochs, test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                      nb_test_errors, test_input.size(0)))

In [17]:
#shuffle data
permuted_index = torch.randperm(train_input.size()[0])
train_input_shuffled = train_input[permuted_index].detach()
train_target_shuffled = train_target[permuted_index].detach()

In [18]:
(train_input == train_input_shuffled).all(), (train_target == train_target_shuffled).all()

(tensor(False), tensor(False))

In [19]:
#just a (visual) check
index = torch.randint(1000, (1,)).item()
fig = plt.figure()
plt.imshow(train_input_shuffled[index][0])
print('target')
print(train_target_shuffled[index])

<IPython.core.display.Javascript object>

target
tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.])


In [20]:
#redefine model and train
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([100, 200, 250, 300, 350, 400, 450, 500])
models = Net()

In [21]:
#train model
train_model(models, train_input, train_target, mini_batch_size, nb_epochs)
print(models.n_params())

99 0.0021065128021291457
99 epochs, test error Net 5.90% 59/1000
199 0.0013858194588465267
199 epochs, test error Net 6.00% 60/1000
249 0.0018092022228302085
249 epochs, test error Net 6.60% 66/1000
299 0.0012937433193656034
299 epochs, test error Net 5.30% 53/1000
349 0.001275767120432647
349 epochs, test error Net 5.10% 51/1000
399 0.0012735592045487465
399 epochs, test error Net 5.20% 52/1000
449 0.002099187670921765
449 epochs, test error Net 5.30% 53/1000
499 0.0012650069777464523
499 epochs, test error Net 5.70% 57/1000
31710


In [22]:
#test error
#for i in range(5):
#    nb_test_errors = compute_nb_errors(models2[i], test_input, test_target, mini_batch_size)
#    print('With', nb_epochs[i],'epochs test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                      nb_test_errors, test_input.size(0)))

In [23]:
#########################################################################################################
#  3 convolutional nn
#########################################################################################################

In [24]:
#Base functions adapted from the practicals
class Net2Conv(nn.Module):
    def __init__(self, ch_1, ch_2, ch_3, hid_1):
        super().__init__()
        self.ch_1 = ch_1
        self.ch_2 = ch_2
        self.ch_3 = ch_3
        self.hid_1 = hid_1
        
        self.conv1 = nn.Conv2d( 1, ch_1, kernel_size=3)
        self.conv2 = nn.Conv2d(ch_1, ch_2, kernel_size=3)
        self.conv3 = nn.Conv2d(ch_2, ch_3, kernel_size=3)
        self.fc1 = nn.Linear(ch_3*16, hid_1)
        self.fc2 = nn.Linear(hid_1, 10)
    
    def n_params(self):
        n = 0
        for params in self.parameters():
            n += params.numel()
        return n

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size = 2, stride = 2)
        x = F.relu(self.fc1(x.view(-1,ch_3*16)))
        x = F.softmax(self.fc2(x), dim=1)
        return x

In [25]:
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([100, 200, 250, 300, 350, 400, 450, 500])

ch_1 = 10
ch_2 = 20
ch_3 = 30
hid_1 = 60

models = Net2Conv(ch_1, ch_2, ch_3, hid_1)

In [26]:
#train model
train_model(models, train_input, train_target, mini_batch_size, nb_epochs)
print(models.n_params())

99 0.014308891204564134
99 epochs, test error Net 7.20% 72/1000
199 0.0071821153642304125
199 epochs, test error Net 5.20% 52/1000
249 0.007969218270773126
249 epochs, test error Net 5.70% 57/1000
299 0.006483191982169956
299 epochs, test error Net 5.60% 56/1000
349 0.0038833183842825747
349 epochs, test error Net 4.70% 47/1000
399 0.0032785685011731402
399 epochs, test error Net 5.00% 50/1000
449 0.002011676188033107
449 epochs, test error Net 4.40% 44/1000
499 0.0019995726815693615
499 epochs, test error Net 4.60% 46/1000
36820


In [27]:
#test error:
#    nb_test_errors = compute_nb_errors(models[i], test_input, test_target, mini_batch_size)
#    print(nb_epochs[i], 'epochs, test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                      nb_test_errors, test_input.size(0)))

In [28]:
#do it again on shuffled data 
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([100, 200, 250, 300, 350, 400, 450, 500])

ch_1 = 10
ch_2 = 20
ch_3 = 30
hid_1 = 60

models = Net2Conv(ch_1, ch_2, ch_3, hid_1) 
#training
train_model(models, train_input, train_target, mini_batch_size, nb_epochs)
print(models.n_params())

99 0.007039430760414689
99 epochs, test error Net 7.20% 72/1000
199 0.004517496816220046
199 epochs, test error Net 6.10% 61/1000
249 0.0028333917518921226
249 epochs, test error Net 5.90% 59/1000
299 0.0036604077561150916
299 epochs, test error Net 6.80% 68/1000
349 0.003931828717668395
349 epochs, test error Net 6.40% 64/1000
399 0.002711714977635893
399 epochs, test error Net 6.00% 60/1000
449 0.0028972869811241253
449 epochs, test error Net 5.90% 59/1000
499 0.034259089319675695
499 epochs, test error Net 8.80% 88/1000
36820


In [29]:
################################################################################################################
# 2 convolutional nn 
################################################################################################################

In [50]:
#Base functions adapted from the practicals
class Net2Conv(nn.Module):
    def __init__(self, ch_1, ch_2, ch_3, hid_1):
        super().__init__()
        self.ch_1 = ch_1
        self.ch_2 = ch_2
        self.hid_1 = hid_1
        
        self.conv1 = nn.Conv2d( 1, ch_1, kernel_size=3)
        self.conv2 = nn.Conv2d(ch_1, ch_2, kernel_size=3)
        self.fc1 = nn.Linear(ch_2*25, hid_1)
        self.fc2 = nn.Linear(hid_1, 10)
        
    def n_params(self):
        n = 0
        for params in self.parameters():
            n += params.numel()
        return n

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size = 2, stride = 2)
        x = F.relu(self.fc1(x.view(-1,ch_2*25)))
        x = F.softmax(self.fc2(x), dim=1)
        return x

In [51]:
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([25, 100, 200, 250, 300, 350, 400, 450, 500])

ch_1 = 30
ch_2 = 30
ch_3 = 0
hid_1 = 100
model = Net2Conv(ch_1, ch_2, ch_3, hid_1)

In [52]:
#train model
train_model(model, train_input, train_target, mini_batch_size, nb_epochs)
print(model.n_params())

24 0.013534654601244256
24 epochs, test error Net 6.20% 62/1000
99 0.0018501409504096955
99 epochs, test error Net 5.50% 55/1000
199 0.0016209470638841594
199 epochs, test error Net 5.00% 50/1000
249 0.012515222493675537
249 epochs, test error Net 6.10% 61/1000
299 0.0020062339449680167
299 epochs, test error Net 5.80% 58/1000
349 0.002004011332104483
349 epochs, test error Net 5.20% 52/1000
399 0.002002702331907358
399 epochs, test error Net 5.20% 52/1000
449 0.0020011273658298023
449 epochs, test error Net 4.90% 49/1000
499 0.0020012183859385857
499 epochs, test error Net 5.20% 52/1000
84540


In [53]:
#test error:
#    nb_test_errors = compute_nb_errors(models[i], test_input, test_target, mini_batch_size)
#    print(nb_epochs[i], 'epochs, test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                      nb_test_errors, test_input.size(0)))

In [54]:
#do it again on shuffled data 
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([25, 100, 200, 250, 300, 350, 400, 450, 500])

ch_1 = 10
ch_2 = 20
ch_3 = 30
hid_1 = 60

models = Net2Conv(ch_1, ch_2, ch_3, hid_1) 
#training
train_model(models, train_input, train_target, mini_batch_size, nb_epochs)
print(model.n_params())

24 0.04446752765215933
24 epochs, test error Net 8.10% 81/1000
99 0.006553927045388264
99 epochs, test error Net 6.70% 67/1000
199 0.004852048318753077
199 epochs, test error Net 5.70% 57/1000
249 0.00129150214843321
249 epochs, test error Net 5.90% 59/1000
299 0.0019624500866939343
299 epochs, test error Net 6.10% 61/1000
349 0.0015092058888512838
349 epochs, test error Net 5.50% 55/1000
399 0.0022002888847509894
399 epochs, test error Net 6.00% 60/1000
449 0.0019998207929461387
449 epochs, test error Net 5.60% 56/1000
499 0.0019903213251097895
499 epochs, test error Net 5.40% 54/1000
84540


In [35]:
################################################################################################################
# 2d convolutional nn without maxpool and convolutions 2*2. Makes sense from the pictures
################################################################################################################

In [62]:
#Base functions adapted from the practicals
class Net2Conv(nn.Module):
    def __init__(self, ch_1, ch_2, ch_3, hid_1):
        super().__init__()
        self.ch_1 = ch_1
        self.ch_2 = ch_2
        self.hid_1 = hid_1
        
        self.conv1 = nn.Conv2d( 1, ch_1, kernel_size=2)
        self.conv2 = nn.Conv2d(ch_1, ch_2, kernel_size=2)
        self.fc1 = nn.Linear(ch_2*6*6, hid_1)
        self.fc2 = nn.Linear(hid_1, 10)
    
    def n_params(self):
        n = 0
        for params in self.parameters():
            n += params.numel()
        return n

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size = 2, stride = 2)
        x = F.relu(self.fc1(x.view(-1,ch_2*6*6)))
        x = F.softmax(self.fc2(x), dim=1)
        return x

In [63]:
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([25, 100, 200, 250, 300, 350, 400, 450, 500])

ch_1 = 10
ch_2 = 20
ch_3 = 30
hid_1 = 60
model = Net2Conv(ch_1, ch_2, ch_3, hid_1)

In [65]:
#train model
train_model(model, train_input, train_target, mini_batch_size, nb_epochs)
print(model.n_params())

24 0.005564660910295061
24 epochs, test error Net 6.80% 68/1000
99 0.02876026090234518
99 epochs, test error Net 8.40% 84/1000
199 0.002371473134076041
199 epochs, test error Net 6.00% 60/1000
249 0.0023766043247093194
249 epochs, test error Net 5.90% 59/1000
299 0.004006880126610213
299 epochs, test error Net 7.70% 77/1000
349 0.0017714247260869342
349 epochs, test error Net 6.90% 69/1000
399 0.001977816725755588
399 epochs, test error Net 6.30% 63/1000
449 0.0012974018774798424
449 epochs, test error Net 5.70% 57/1000
499 0.0011702952042327297
499 epochs, test error Net 5.30% 53/1000
44740


In [66]:
#test error:
#    nb_test_errors = compute_nb_errors(models[i], test_input, test_target, mini_batch_size)
#    print(nb_epochs[i], 'epochs, test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
#                                                      nb_test_errors, test_input.size(0)))

In [67]:
#do it again on shuffled data 
#initialize model w batch size and epochs
mini_batch_size = 100
nb_epochs = torch.tensor([25, 100, 200, 250, 300, 350, 400, 450, 500])

ch_1 = 10
ch_2 = 20
ch_3 = 30
hid_1 = 60

models = Net2Conv(ch_1, ch_2, ch_3, hid_1)
#training
train_model(models, train_input, train_target, mini_batch_size, nb_epochs)
print(models.n_params())

24 0.04375008586794138
24 epochs, test error Net 9.50% 95/1000
99 0.01391847018385306
99 epochs, test error Net 8.20% 82/1000
199 0.004741497222312319
199 epochs, test error Net 6.80% 68/1000
249 0.0024465869691994158
249 epochs, test error Net 6.60% 66/1000
299 0.003620055089413654
299 epochs, test error Net 7.00% 70/1000
349 0.003919987973631578
349 epochs, test error Net 6.30% 63/1000
399 0.003532110627759266
399 epochs, test error Net 7.60% 76/1000
449 0.0037174319809878398
449 epochs, test error Net 6.70% 67/1000
499 0.004001532537586172
499 epochs, test error Net 6.60% 66/1000
44740
