### Import models and load data

In [6]:
from torchvision import datasets
import torch
import dlc_practical_prologue as prologue
import matplotlib.pyplot as plt

# Import all models
from BaseNet import *
from ConvNet1 import *
#from NetSharing import *

mini_batch_size = 1000
nb_epochs = 300
nb_runs = 2
eta = 0.001

train_input, train_target, train_classes, test_input, test_target, test_classes = \
    prologue.generate_pair_sets(nb=1000)



### 1channel2images

In [2]:
from importlib import reload
import _1channel2images
reload(_1channel2images)
from _1channel2images import *

print("Working with 1channel2images framework, nb_classes = ", nb_classes)

#model = BaseNet1C(nb_classes)
model_1C = ConvNet1_1C(nb_classes)
optimizer_1C = torch.optim.SGD(model_1C.parameters(), lr=eta, momentum=0.95)
test_results_1C = multiple_training_runs(model_1C, 2, optimizer_1C, train_input, train_classes,
                                      test_input, test_target, test_classes, mini_batch_size, nb_epochs)
write_to_csv('1channel2images.csv', model_1C, test_results_1C)

Working with 1channel2images framework, nb_classes =  10


KeyboardInterrupt: 

In [None]:
# This is just for visualization of our results but it will have to be taken away for the report 
# since we can't use any additional libraries
import pandas as pd

print(test_results_1C)
write_to_csv('1channel2images.csv', model_1C, test_results_1C)
data = pd.read_csv('1channel2images.csv')
data.head()

### 2channels1image

In [None]:
import _2channels1image
reload(_2channels1image)
from _2channels1image import *

print("Working with 2channels1image framework, nb_classes = ", nb_classes)

model_2C = ConvNet1_2C(nb_classes)
optimizer_2C = torch.optim.SGD(model_2C.parameters(), lr=0.00001, momentum=0.95)  
test_results_2C = multiple_training_runs(model_2C, 2, optimizer_2C, train_input, train_target,
                           test_input, test_target, mini_batch_size, nb_epochs)
write_to_csv('2channels1image.csv', model_2C, test_results_2C)

In [None]:
print(test_results_2C)
write_to_csv('2channels1image.csv', model_2C, test_results_2C)
data = pd.read_csv('2channels1image.csv')
data.head()

### Weight Sharing

In [None]:
import weight_sharing
reload(weight_sharing)
from weight_sharing import *

print("Working with weight_sharing framework")

model_ws = NetSharing1()
optimizer_ws = torch.optim.SGD(model_ws.parameters(), lr=0.00001, momentum=0.95)  
test_results_ws = multiple_training_runs(model_ws, nb_runs, optimizer_ws, train_input, train_target,
                           test_input, test_target, mini_batch_size, nb_epochs)
write_to_csv('weightsharing.csv', model_ws, test_results_ws)

In [None]:
print(test_results_ws)
write_to_csv('weightsharing.csv', model_ws, test_results_ws)
data = pd.read_csv('weightsharing.csv')
data.head()

### Auxiliary Losses

In [None]:
import auxiliary_losses
reload(auxiliary_losses)
from auxiliary_losses import *
import Incept1
reload(Incept1)
from Incept1 import *
nb_epochs = 300
print("Working with auxiliary losses framework")

model_aux = Incept1()
optimizer_aux = torch.optim.SGD(model_aux.parameters(), lr=0.00001, momentum=0.95)  
test_results_aux = multiple_training_runs(model_aux, nb_runs, optimizer_aux, train_input, train_target, train_classes, 
                                          test_input, test_target, test_classes, mini_batch_size, nb_epochs)
write_to_csv('auxiliary_losses.csv', model_aux, test_results_aux)

In [None]:
print(test_results_aux)
write_to_csv('auxiliary_losses.csv', model_aux, test_results_aux)
data = pd.read_csv('auxiliary_losses.csv')
data.head()

## Weight Sharing and Auxiliary Loss Models(Youssef)

In [8]:
print(train_input.shape, train_target.shape, train_classes.shape, test_input.shape, test_target.shape, test_classes.shape)

torch.Size([1000, 2, 14, 14]) torch.Size([1000]) torch.Size([1000, 2]) torch.Size([1000, 2, 14, 14]) torch.Size([1000]) torch.Size([1000, 2])


In [14]:
from torch.autograd import Variable
from WSharingModel import *
from torch import nn, optim
from torch.nn import CrossEntropyLoss
from torch.nn import BCEWithLogitsLoss
import torch
from torchvision import datasets
from torch import optim
import dlc_practical_prologue as prologue
from AuxModel import *
from torch.autograd import Variable
from torch.nn import functional as F
from time import time


#### Function that draws the evolution of the loss during training.

In [9]:
def visualize_loss(model,loss_model,lr):
    plt.plot(loss_model[0],loss_model[1], color='orange')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Evolution of the loss during training with learning rate {:>5} of the model {:>5}.'.format(lr,model.__class__.__name__))
    plt.show()

#### Function that prints the results of the prediction one the model is trained.

In [15]:
def print_results(model,optimizer,learning_rate,NB_EPOCHS,MINI_BATCH_SIZE, train_input, train_target,test_input, test_target, time_diff):
     print('model: {:6}, optimizer: {:6}, learning rate: {:6}, num epochs: {:3}, '
                    'mini batch size: {:3}, train error: {:5.2f}%, test error: {:5.2f}%, time: {:.3f}'.format(
                    model.__class__.__name__,
                    optimizer.__name__,
                    learning_rate,
                    NB_EPOCHS,
                    MINI_BATCH_SIZE,
                    model.compute_nb_errors(model, train_input, train_target, MINI_BATCH_SIZE) / train_input.size(0) * 100,
                    model.compute_nb_errors(model, test_input, test_target, MINI_BATCH_SIZE) / test_input.size(0) * 100,
                    time_diff
                    )
                )

### Treatement of our Training and Testing Data.

In [11]:
# normalize it
mean, std = train_input.mean(), train_input.std() 
train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

train_input, train_target, train_classes = Variable(train_input), Variable(train_target), Variable(train_classes)
test_input, test_target = Variable(test_input), Variable(test_target)

### We define our Learning parameters.

In [18]:
NB_EPOCHS = 25
MINI_BATCH_SIZE = 100
learning_rates = [0.1, 0.01]

### We define our optimizers and losses and weights for the auxiliary losses.

In [13]:
op = torch.optim.SGD
losses = [CrossEntropyLoss(),BCEWithLogitsLoss()]
alpha = 0.5
beta = 1

### Training of Different Models and Results on Training and Testing set.

In [41]:
 def calculate_results(result_dct):
    k = 0
    for j in range(4):
        for i in range(len(learning_rates)):
            models = [WSModel(),WSModel1(),AuxModel(),AuxModel1()]
            model = models[j]
            optimizer = op(model.parameters(),lr = learning_rates[i])
            if (j<2):
                #print("here")
                start_time = time()
                loss_aux = train_model_WS(model, optimizer,  train_input, train_target, NB_EPOCHS, MINI_BATCH_SIZE,losses[0])
                end_time = time()
            elif(j>1):
                #print("here here")
                start_time = time()
                loss_aux = train_model_AM(model, optimizer,  train_input, train_target, train_classes,NB_EPOCHS, MINI_BATCH_SIZE,losses[0],alpha,beta)
                end_time = time()
            #print("model:",model)
            time_diff = end_time - start_time
            print_results(model,op,learning_rates[i],NB_EPOCHS,MINI_BATCH_SIZE, train_input, train_target,test_input, test_target, time_diff)
            print(result_dct[k])
            train_error = model.compute_nb_errors(model, train_input, train_target, MINI_BATCH_SIZE) / train_input.size(0) * 100
            test_error = model.compute_nb_errors(model, test_input, test_target, MINI_BATCH_SIZE) / test_input.size(0) * 100
            result_dct[k]['train_error'].append(train_error)
            result_dct[k]['test_error'].append(test_error)
            result_dct[k]['time'].append(time_diff)
            k += 1
    return result_dct
            #visualize_loss(model,loss_aux,learning_rates[i])

In [42]:
from collections import defaultdict
result_dct = defaultdict(dict)
for i in range(8):
    result_dct[i]['train_error'] = []
    result_dct[i]['test_error'] = []
    result_dct[i]['time'] = []
for i in range(10):
    print(result_dct)
    result_dct = calculate_results(result_dct)

defaultdict(<class 'dict'>, {0: {'train_error': [], 'test_error': [], 'time': []}, 1: {'train_error': [], 'test_error': [], 'time': []}, 2: {'train_error': [], 'test_error': [], 'time': []}, 3: {'train_error': [], 'test_error': [], 'time': []}, 4: {'train_error': [], 'test_error': [], 'time': []}, 5: {'train_error': [], 'test_error': [], 'time': []}, 6: {'train_error': [], 'test_error': [], 'time': []}, 7: {'train_error': [], 'test_error': [], 'time': []}})
Loss at   0 : 1.8308327198028564  
model: WSModel, optimizer: SGD   , learning rate:    0.1, num epochs:  25, mini batch size: 100, train error:  5.40%, test error: 13.90%, time: 18.346
{'train_error': [], 'test_error': [], 'time': []}
Loss at   0 : 1.2222884893417358  
model: WSModel, optimizer: SGD   , learning rate:   0.01, num epochs:  25, mini batch size: 100, train error: 24.10%, test error: 25.60%, time: 23.529
{'train_error': [], 'test_error': [], 'time': []}
Loss at   0 : 0.6671006083488464  
model: WSModel1, optimizer: SGD

{'train_error': [46.7, 45.300000000000004], 'test_error': [48.8, 43.4], 'time': [29.614402770996094, 21.632165908813477]}
Loss at   0 : 4.580043792724609  
model: AuxModel1, optimizer: SGD   , learning rate:    0.1, num epochs:  25, mini batch size: 100, train error:  5.40%, test error:  9.50%, time: 22.168
{'train_error': [6.0, 10.5], 'test_error': [11.4, 12.9], 'time': [26.080273389816284, 22.549712896347046]}
Loss at   0 : 4.9327802658081055  
model: AuxModel1, optimizer: SGD   , learning rate:   0.01, num epochs:  25, mini batch size: 100, train error: 31.20%, test error: 31.40%, time: 21.816
{'train_error': [36.6, 29.2], 'test_error': [36.7, 28.499999999999996], 'time': [25.196635007858276, 21.920395374298096]}
defaultdict(<class 'dict'>, {0: {'train_error': [5.4, 5.6000000000000005, 4.5], 'test_error': [13.900000000000002, 14.099999999999998, 13.8], 'time': [18.34577441215515, 25.243510007858276, 21.689013242721558]}, 1: {'train_error': [24.099999999999998, 21.8, 22.3], 'test_err

Loss at   0 : 4.852020263671875  
model: AuxModel, optimizer: SGD   , learning rate:    0.1, num epochs:  25, mini batch size: 100, train error:  9.10%, test error:  9.60%, time: 27.508
{'train_error': [7.7, 6.2, 18.5, 6.4], 'test_error': [10.299999999999999, 10.0, 23.0, 11.4], 'time': [26.473223447799683, 22.363212823867798, 22.30935525894165, 27.179333686828613]}
Loss at   0 : 4.949337959289551  
model: AuxModel, optimizer: SGD   , learning rate:   0.01, num epochs:  25, mini batch size: 100, train error: 44.60%, test error: 48.80%, time: 22.599
{'train_error': [46.7, 45.300000000000004, 30.7, 42.3], 'test_error': [48.8, 43.4, 30.4, 41.3], 'time': [29.614402770996094, 21.632165908813477, 21.612218141555786, 26.003785371780396]}
Loss at   0 : 4.7352447509765625  
model: AuxModel1, optimizer: SGD   , learning rate:    0.1, num epochs:  25, mini batch size: 100, train error:  5.60%, test error: 10.90%, time: 21.811
{'train_error': [6.0, 10.5, 5.4, 6.0], 'test_error': [11.4, 12.9, 9.5, 8

defaultdict(<class 'dict'>, {0: {'train_error': [5.4, 5.6000000000000005, 4.5, 5.4, 8.1, 7.3], 'test_error': [13.900000000000002, 14.099999999999998, 13.8, 13.200000000000001, 16.1, 15.2], 'time': [18.34577441215515, 25.243510007858276, 21.689013242721558, 26.096319913864136, 24.410738706588745, 22.901767015457153]}, 1: {'train_error': [24.099999999999998, 21.8, 22.3, 24.0, 24.5, 24.099999999999998], 'test_error': [25.6, 23.799999999999997, 25.0, 24.8, 27.1, 24.6], 'time': [23.529094219207764, 25.57263159751892, 21.261157512664795, 24.251163244247437, 24.25906753540039, 23.300223112106323]}, 2: {'train_error': [1.0999999999999999, 0.1, 0.8999999999999999, 0.1, 0.3, 0.1], 'test_error': [14.399999999999999, 13.5, 14.899999999999999, 13.100000000000001, 13.8, 13.700000000000001], 'time': [24.00083613395691, 21.688016414642334, 21.557363986968994, 24.636134386062622, 23.523109912872314, 22.550456523895264]}, 3: {'train_error': [21.2, 21.6, 21.099999999999998, 21.6, 20.5, 19.900000000000002

defaultdict(<class 'dict'>, {0: {'train_error': [5.4, 5.6000000000000005, 4.5, 5.4, 8.1, 7.3, 5.800000000000001], 'test_error': [13.900000000000002, 14.099999999999998, 13.8, 13.200000000000001, 16.1, 15.2, 14.000000000000002], 'time': [18.34577441215515, 25.243510007858276, 21.689013242721558, 26.096319913864136, 24.410738706588745, 22.901767015457153, 25.148117065429688]}, 1: {'train_error': [24.099999999999998, 21.8, 22.3, 24.0, 24.5, 24.099999999999998, 21.099999999999998], 'test_error': [25.6, 23.799999999999997, 25.0, 24.8, 27.1, 24.6, 24.7], 'time': [23.529094219207764, 25.57263159751892, 21.261157512664795, 24.251163244247437, 24.25906753540039, 23.300223112106323, 24.05069923400879]}, 2: {'train_error': [1.0999999999999999, 0.1, 0.8999999999999999, 0.1, 0.3, 0.1, 1.2], 'test_error': [14.399999999999999, 13.5, 14.899999999999999, 13.100000000000001, 13.8, 13.700000000000001, 13.4], 'time': [24.00083613395691, 21.688016414642334, 21.557363986968994, 24.636134386062622, 23.523109

defaultdict(<class 'dict'>, {0: {'train_error': [5.4, 5.6000000000000005, 4.5, 5.4, 8.1, 7.3, 5.800000000000001, 5.6000000000000005], 'test_error': [13.900000000000002, 14.099999999999998, 13.8, 13.200000000000001, 16.1, 15.2, 14.000000000000002, 14.000000000000002], 'time': [18.34577441215515, 25.243510007858276, 21.689013242721558, 26.096319913864136, 24.410738706588745, 22.901767015457153, 25.148117065429688, 24.65308904647827]}, 1: {'train_error': [24.099999999999998, 21.8, 22.3, 24.0, 24.5, 24.099999999999998, 21.099999999999998, 24.3], 'test_error': [25.6, 23.799999999999997, 25.0, 24.8, 27.1, 24.6, 24.7, 25.6], 'time': [23.529094219207764, 25.57263159751892, 21.261157512664795, 24.251163244247437, 24.25906753540039, 23.300223112106323, 24.05069923400879, 25.51637291908264]}, 2: {'train_error': [1.0999999999999999, 0.1, 0.8999999999999999, 0.1, 0.3, 0.1, 1.2, 0.1], 'test_error': [14.399999999999999, 13.5, 14.899999999999999, 13.100000000000001, 13.8, 13.700000000000001, 13.4, 13.

defaultdict(<class 'dict'>, {0: {'train_error': [5.4, 5.6000000000000005, 4.5, 5.4, 8.1, 7.3, 5.800000000000001, 5.6000000000000005, 6.4], 'test_error': [13.900000000000002, 14.099999999999998, 13.8, 13.200000000000001, 16.1, 15.2, 14.000000000000002, 14.000000000000002, 14.099999999999998], 'time': [18.34577441215515, 25.243510007858276, 21.689013242721558, 26.096319913864136, 24.410738706588745, 22.901767015457153, 25.148117065429688, 24.65308904647827, 27.82798433303833]}, 1: {'train_error': [24.099999999999998, 21.8, 22.3, 24.0, 24.5, 24.099999999999998, 21.099999999999998, 24.3, 23.400000000000002], 'test_error': [25.6, 23.799999999999997, 25.0, 24.8, 27.1, 24.6, 24.7, 25.6, 24.8], 'time': [23.529094219207764, 25.57263159751892, 21.261157512664795, 24.251163244247437, 24.25906753540039, 23.300223112106323, 24.05069923400879, 25.51637291908264, 26.0882511138916]}, 2: {'train_error': [1.0999999999999999, 0.1, 0.8999999999999999, 0.1, 0.3, 0.1, 1.2, 0.1, 0.1], 'test_error': [14.39999

In [45]:
import statistics
for k,v in result_dct.items():
    print('mean of train_error of model {} = {}'.format(k, statistics.mean(v['train_error'])))
    print('stdev of train_error of model {} = {}'.format(k, statistics.stdev(v['train_error'])))
    print('mean of test_error of model {} = {}'.format(k, statistics.mean(v['test_error'])))
    print('stdev of test_error of model {} = {}'.format(k, statistics.stdev(v['test_error'])))
    print('mean of time of model {} = {}'.format(k, statistics.mean(v['time'])))
    print('stdev of time of model {} = {}'.format(k, statistics.stdev(v['time'])))
    print()

mean of train_error of model 0 = 6.0600000000000005
stdev of train_error of model 0 = 1.0437113266288398
mean of test_error of model 0 = 14.18
stdev of test_error of model 0 = 0.8560892995995738
mean of time of model 0 = 24.28234467506409
stdev of time of model 0 = 2.718523210149737

mean of train_error of model 1 = 23.19
stdev of train_error of model 1 = 1.210555428076073
mean of test_error of model 1 = 25.09
stdev of test_error of model 1 = 0.8710785141293403
mean of time of model 1 = 24.37064745426178
stdev of time of model 1 = 1.4769039481585113

mean of train_error of model 2 = 0.41
stdev of train_error of model 2 = 0.46296148147911104
mean of test_error of model 2 = 13.75
stdev of test_error of model 2 = 0.5661762583821
mean of time of model 2 = 24.156477952003478
stdev of time of model 2 = 1.8505041649980032

mean of train_error of model 3 = 20.85
stdev of train_error of model 3 = 0.6916164640415479
mean of test_error of model 3 = 20.5
stdev of test_error of model 3 = 1.38644228

TypeError: can't pickle torch._C.Generator objects