## Cleaned up basic LeNet exploration.


##### *Starting points : Importing, getting data, normalizing*
`Stuff you need, but is hidden here.`

In [1]:
#Starting points : import everything, use cuda if available
import torch
import math
from torch.nn import functional as F
import dlc_practical_prologue as prologue
import matplotlib.pyplot as plt
%matplotlib inline
from torch import optim
from torch import Tensor
from torch import nn

if torch.cuda.is_available():
    device = torch.device('cuda')
    print("Using : {}".format(device))
else:
    device = torch.device('cpu')
    print("Using : {}".format(device))


Using : cuda


In [3]:
def norm_(train_input,test_input):
    """Function to normalize the input --> done IN PLACE!"""
    mu, std = train_input.mean(), train_input.std()
    train_inputOut = train_input.sub_(mu).div_(std)
    test_inputOut = test_input.sub_(mu).div_(std)
    return train_inputOut, test_inputOut

In [4]:
#Generating pairs of 14x14 and sending to device. Here default
N=1000
(train_input,train_target,train_classes, \
 test_input,test_target,test_classes) = prologue.generate_pair_sets(N)
train_input = train_input.to(device)
test_input = test_input.to(device)
train_target = train_target.to(device)
test_target = test_target.to(device)
train_classes, test_classes = train_classes.to(device), test_classes.to(device)
train_input,test_input = norm_(train_input,test_input);


#### Model : Shared-weight ConvNet. Same as Plain ConvNet but just separating the inputs.

`Conv layer 1 : takes 1x14x14 --> 32x12x12 --> Maxpool --> 32x6x6` 

`Conv Layer 2 : Takes 32x6x6 --> 64x4x4 --> Maxpool 64x2x2`

`FC 1 : View(-1,64*2*2) --> 264 (random number but works)`

`FC 2 : 264-->100 --> FC3 --> 2`

`Using dropout, batchnorm on all hidden layers (FC1, FC2)`

`Softmax as last activation, ReLU for all the others`

In [5]:
##Conv layers, based on the filter size and layer sizes tested in explorationLeNetDual
# must take 1x14x14 (so the same layers is used on both images)
# Separated the modules because I couldn't figure out how to make a single net work...
# will maybe merge into a single net later.
class convlayer(nn.Module):
    def __init__(self):
        super(convlayer, self).__init__()
        #self.conv1 : takes 1x14x14, gives 32x12x12, then maxpool(k=2) -> 32x6x6
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3) 
        #self.conv2 : takes 32x6x6, gives 64x4x4, then maxpool(k=2) -> outputs 64x2x2 to the fc layers
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2,stride=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2,stride=2))
        return x
#fc layers, adding a supp layer that has output dim 10 (instead of 2)
#in order to maybe calculate an aux loss on this output to have classification?
# REMEMBER TO ADD AN ACTIVATION WHEN CALLING SHARED_FCLAYER IN YOUR NETWORK!!
# ex : F.relu(self.shared_fclayer(tmp1) or F.softmax(...,dim=1)
class shared_fclayer(nn.Module):
    def __init__(self):
        super(shared_fclayer,self).__init__()
        #gets in 64x2x2, convers to 1x250
        self.fc1 = nn.Linear(2*2*64,264)
        self.bn1 = nn.BatchNorm1d(264)
        #second layer : 250 to 100
        self.fc2 = nn.Linear(264,100)  
        self.bn2 = nn.BatchNorm1d(100)
        #outputs dim 10 so we can test the aux loss for classifying numbers
        #use softmax on fc3?
        self.fc3 = nn.Linear(100,10)
        self.dropout = nn.Dropout(0.25)
    def forward(self,x):
        x = self.dropout(self.bn1(F.relu(self.fc1(x.view(-1,2*2*64)))))
        x = self.dropout(self.bn2(F.relu(self.fc2(x))))
        x = self.fc3(x)
        return x
#extra final layer, not shared
class final_predictionlayer(nn.Module):
    #this final layer should take 2*10 (one for each image) and output 2 
    def __init__(self):
        super(final_predictionlayer,self).__init__()
        self.final = nn.Linear(20,2)
    def forward(self,x):
        x = F.softmax(self.final(x),dim=1)
        return x

#weight-sharing Net
#returns tmp1, tmp2 in order to calculate and optimize with auxLoss
#Those will be compared to the train_classes.narrow(1,0,1) and .narrow(1,1,1)

class AuxLossWS_Net(nn.Module):
    def __init__(self):
        super(AuxLossWS_Net,self).__init__()
        self.convlayer = convlayer()
        self.fclayer = shared_fclayer()
        self.final = final_predictionlayer()
    def forward(self,x):
        tmp1 = x.narrow(1,0,1) #viewing only one image
        tmp2 = x.narrow(1,1,1) #viewing only one image
        #applying the conv layers
        tmp1 = self.convlayer.forward(tmp1) 
        tmp2 = self.convlayer.forward(tmp2)
        #applying the fc layers
        tmp1 = F.softmax(self.fclayer(tmp1),dim=1)
        tmp2 = F.softmax(self.fclayer(tmp2),dim=1)
        #viewing and final prediction
        output = torch.cat((tmp1,tmp2),1)
        output.view(-1,20)
        x = self.final(output)
        return x, tmp1, tmp2     

#### Train_model, compute_nb_errors, run_net

##### *Adapted to also train for auxLoss. Gamma determines the "weight" of the primary loss. Works best with gamma = 0.67, maybe try other values.*
*run_net(...) does everything, use only this function, it will call the others. (See params)*

*run_net(...) **RETURNS** the test error as a float, useful for average over multiple runs* 

In [13]:
def train_model_aux(model, train_input, train_target, train_classes, nb_epochs=50, 
                eta=9e-2, mini_batch_size=25, 
                    alpha=.75, gamma=1,
                    printTrain = False, graphLoss = False):
    """Trains the model, using CrossEntropyLoss and SGD 
    Model : Architecture to be tested, pytorch.nn.Module
    Train_input : Input tensors Nx2x14x14, N = 1000
    Train_target : Target labels, N, classes = 0 or 1
    Nb_epochs : nb of epochs to train over
    eta : Learning rate
    mini_batch_size : Size of minibatch to be processed"""
    
    #Squeeze the classes labels (hotlabeling) for the auxLoss
    trainlabel_1 = (train_classes.narrow(1,0,1)).squeeze()
    trainlabel_2 = (train_classes.narrow(1,1,1)).squeeze()

    model.train(True)
    criterion = nn.CrossEntropyLoss()
    model.to(device)
    criterion.to(device)
    optimizer = optim.SGD(model.parameters(),lr=eta)
    losses = []
    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):    
            
            out_compare, out_1, out_2 = model(train_input.narrow(0, b, mini_batch_size))
            #Main Loss
            loss_compare = criterion(out_compare, train_target.narrow(0, b, mini_batch_size))
            #AuxLoss
            loss_1 = criterion(out_1, trainlabel_1.narrow(0, b, mini_batch_size))
            loss_2 = criterion(out_2, trainlabel_2.narrow(0, b, mini_batch_size))
            #Weighted sum. Used to be Alpha*Loss1 + Beta*Loss2 + Gamma* Loss compare
            #Didn't work well, try again with other alpha/betas < 1.
            loss_sum = alpha*loss_1 + alpha*loss_2 + gamma*loss_compare
            
            losses.append(loss_sum)
            model.zero_grad()
            loss_sum.backward()
            optimizer.step()
        if printTrain : 
            print("Epoch : {} :: Train error : {}/{}, {:0f}%".format(e,
            compute_nb_errors(model,train_input,train_target,mini_batch_size),train_target.size(0),
            (100*compute_nb_errors(model,train_input,train_target,mini_batch_size)/train_target.size(0))))
    if graphLoss : 
        plt.plot(losses)
        plt.ylabel('loss')
#--------------------------------------------------------------------------------------------------------#
def compute_nb_errors(model,data_input,data_target,mini_batch_size):
    """std from the séries"""
    nb_errors = 0;
    model.to(device)
    data_input, data_target = data_input.to(device),data_target.to(device)
    for b in range(0,data_input.size(0),mini_batch_size):
        output, _, _ = model(data_input.narrow(0,b,mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b+k]!=predicted_classes[k]:
                nb_errors += 1
    
    return nb_errors
#--------------------------------------------------------------------------------------------------------#
def run_net_aux(model,train_input,train_target, train_classes,
            test_input, test_target, test_classes,
            nb_epochs = 50,eta=9e-2,mini_batch_size=25,
            alpha = .75, gamma=1,
            printTrain = False,graphLoss = False):
    """"""
    model.to(device)
    print("Model tested : {}".format(str(model)[:str(model).find('(')]))
    print("""Using {} epochs, lr = {:.04f},Mini batch size = {}""".format(nb_epochs,
                                                                          eta,mini_batch_size))
    train_model_aux(model, train_input, train_target, train_classes,
                nb_epochs, eta, mini_batch_size, 
                    alpha, gamma, printTrain,graphLoss)
    model.train(False)
    train_error = compute_nb_errors(model, train_input, train_target,mini_batch_size) / train_input.size(0) * 100
    test_error = compute_nb_errors(model, test_input, test_target,mini_batch_size) / test_input.size(0) * 100
    print('train_error {:.02f}% test_error {:.02f}% \n'.format(
                train_error,
                test_error
            )
        )
    return float(test_error)

In [7]:
err = 0.0;
nb = 5
gamma = 0.67
#Testing with softmax on fully_connected (on linear(100,10)) on TMP1,2
for n in range(nb):
    err += run_net_aux(AuxLossWS_Net(), train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                       alpha = 1, gamma = gamma)
print("Softmax Net : Softmax only on tmp1, tmp2 NO RELU AT ALL ON FC3")
print("Average error for gamma = {}, over {} runs :: {:02f}".format(gamma,nb,err/nb))


Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.30% test_error 4.50% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.60% test_error 4.60% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.50% test_error 4.30% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.50% test_error 4.90% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.60% test_error 3.80% 

Softmax Net : Softmax only on tmp1, tmp2 NO RELU AT ALL ON FC3
Average error for gamma = 0.67, over 5 runs :: 4.420000


In [9]:
err = 0.0;
nb = 5

#Testing with softmax on fully_connected (on linear(100,10)) on TMP1,2
for n in range(nb):
    err += run_net_aux(AuxLossWS_Net(), train_input, train_target, train_classes,
                test_input, test_target, test_classes,
                       alpha=1,gamma = 1)
print("AuxLossWS_Net")
print("Average error for gamma = {}, over {} runs :: {:02f}".format(gamma,nb,err/nb))


Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.10% test_error 3.70% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.50% test_error 3.60% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.50% test_error 4.20% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 0.80% test_error 3.40% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.40% test_error 4.50% 

Softmax Net : Softmax only on tmp1, tmp2 NO RELU AT ALL ON FC3
Average error for gamma = 0.8, over 5 runs :: 3.880000


In [None]:
nb = 4
print("AuxLossWS_Net")
arrayValues = []
#Testing with softmax on fully_connected (on linear(100,10)) on TMP1,2
for a in [0.25,0.5,0.75]:
    for c in [0.25,0.5,0.75,1]:
        err = 0.0;
        for n in range(nb):
            err += run_net_aux(AuxLossWS_Net(), train_input, train_target, train_classes,
                            test_input, test_target, test_classes,
                                   alpha=a,gamma = c)
        print("Average error for alpha = {}, gamma = {}, over {} runs :: {:02f}".format(a,c,nb,err/nb))
        err = err/nb;
        arrayValues.append([err,a,c])
print(arrayValues)

AuxLossWS_Net
Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 5.60% test_error 8.30% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 8.00% test_error 7.50% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 5.30% test_error 5.70% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 3.00% test_error 5.30% 

Average error for alpha = 0.25, gamma = 0.25, over 4 runs :: 6.700000
Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.40% test_error 5.30% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.30% test_error 4.80% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.80% test_error 6.50% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.40% test_er

In [16]:
print(arrayValues)

[[6.7, 0.25, 0.25], [5.625, 0.25, 0.5], [5.450000000000001, 0.25, 0.75], [6.625, 0.25, 1], [6.7749999999999995, 0.5, 0.25], [5.0249999999999995, 0.5, 0.5], [4.3, 0.5, 0.75], [3.85, 0.5, 1], [7.125, 0.75, 0.25], [4.9750000000000005, 0.75, 0.5], [4.7250000000000005, 0.75, 0.75], [3.775, 0.75, 1]]


In [17]:
for a in [1]:
    for c in [0.25,0.5,0.75,1]:
        err = 0.0;
        for n in range(nb):
            err += run_net_aux(AuxLossWS_Net(), train_input, train_target, train_classes,
                            test_input, test_target, test_classes,
                                   alpha=a,gamma = c)
        print("Average error for alpha = {}, gamma = {}, over {} runs :: {:02f}".format(a,c,nb,err/nb))
        err = err/nb;
        arrayValues.append([err,a,c])
print(arrayValues)

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 4.30% test_error 6.50% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 4.50% test_error 5.60% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 4.00% test_error 6.00% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 3.00% test_error 5.20% 

Average error for alpha = 1, gamma = 0.25, over 4 runs :: 5.825000
Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.20% test_error 4.80% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.30% test_error 4.80% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.20% test_error 5.00% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 2.40% test_error 5.00% 

Avera

In [28]:
#Run 10 times : Get accuracy. Then get mean and Std, for gamma = 1, and a in 0.5, 0.67, 0.75, 1 (rows 0 to 3)
x = torch.zeros(4,10,dtype =torch.float64,device='cuda')
i=0;
for a in [0.5,0.67,0.75,1]:
    print("ALPHA = {}".format(a))
    for j in range(10):
        x[i,j]=run_net_aux(AuxLossWS_Net(), train_input, train_target, train_classes,
                            test_input, test_target, test_classes,
                                   alpha=a,gamma = 1)
    i+=1


ALPHA = 0.5
Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.60% test_error 4.70% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 0.80% test_error 3.40% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 0.70% test_error 4.30% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 0.70% test_error 4.20% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 0.90% test_error 4.40% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 0.80% test_error 4.00% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.60% test_error 3.80% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mini batch size = 25
train_error 1.50% test_error 5.10% 

Model tested : AuxLossWS_Net
Using 50 epochs, lr = 0.0900,Mi

In [37]:
print(x)

mu = x.mean(1)
std = x.std(1)
alpha = torch.Tensor([0.5,0.67,0.75,1])

tensor([[4.7000, 3.4000, 4.3000, 4.2000, 4.4000, 4.0000, 3.8000, 5.1000, 3.8000,
         4.1000],
        [4.3000, 3.9000, 4.0000, 4.2000, 3.9000, 3.9000, 5.0000, 3.5000, 3.4000,
         4.3000],
        [4.1000, 3.5000, 3.6000, 3.7000, 3.5000, 4.3000, 3.4000, 3.7000, 3.5000,
         3.1000],
        [3.8000, 3.1000, 3.4000, 4.4000, 3.6000, 4.2000, 3.9000, 3.6000, 3.2000,
         3.5000]], device='cuda:0', dtype=torch.float64)


In [39]:
print("Mean accuracy : ",mu, "\n Accuracy Std Dev : ",std, "\n for Alpha =",alpha)

Mean accuracy :  tensor([4.1800, 4.0400, 3.6400, 3.6700], device='cuda:0', dtype=torch.float64) 
 Accuracy Std Dev :  tensor([0.4849, 0.4526, 0.3438, 0.4138], device='cuda:0', dtype=torch.float64) 
 for Alpha = tensor([0.5000, 0.6700, 0.7500, 1.0000])
