<a href="https://colab.research.google.com/github/arthurbabey/deep_learning/blob/master/project_1/conv_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
from torch import nn
import torch.optim as optim
from torch.nn import functional as F
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import sys
sys.path.append('/content/drive/My Drive')
import dlc_practical_prologue as prolog


In [0]:
def nb_errors(pred, truth):
    
    pred_class = pred.argmax(1)
    return (pred_class - truth != 0).sum().item()

def nb_errors_aux(pred, truth):
    
    pred_class = pred.view(-1, 2, 10).argmax(2).argmax(1)
    return (pred_class - truth != 0).sum().item()
        
    
def train_model(model, train_input, train_target, test_input, test_target,  epochs=500, batch_size=100, lr=0.1):

    torch.nn.init.xavier_uniform_(model.conv1.weight)
    torch.nn.init.xavier_uniform_(model.conv2.weight)
    
    optimizer = torch.optim.Adam(model.parameters())
    #scheduler = StepLR(optimizer, step_size=100, gamma=0.1)
    train_loss = []
    test_loss = []
    test_accuracy = []
    best_accuracy = 0
    best_epoch = 0
    
    for i in range(epochs):
        model.train()
      
        for b in range(0, train_input.size(0), batch_size):
            output = model(train_input.narrow(0, b, batch_size))
            criterion = torch.nn.CrossEntropyLoss()
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #scheduler.step()

        output_train = model(train_input)
        model.eval()
        output_test = model(test_input)
        train_loss.append(criterion(output_train, train_target).item())
        test_loss.append(criterion(output_test, test_target).item())
        accuracy = 1 - nb_errors(output_test, test_target) / 1000
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_epoch = i+1
        test_accuracy.append(accuracy)
        
        if i%100 == 0:
            print('Epoch : ',i+1, '\t', 'test loss :', test_loss[-1], '\t', 'train loss', train_loss[-1])
        
    return train_loss, test_loss, test_accuracy, best_accuracy



def train_model_aux(model, train_input, train_classes, test_input, test_target, test_classes,\
                epochs=250, batch_size=100, lr=0.1):
    
    torch.nn.init.xavier_uniform_(model.conv1.weight)
    torch.nn.init.xavier_uniform_(model.conv2.weight)
        
    optimizer = torch.optim.Adam(model.parameters())
    train_loss = []
    test_loss = []
    test_accuracy = []
    best_accuracy = 0
    best_epoch = 0
    
    for i in range(epochs):
        for b in range(0, train_input.size(0), batch_size):
            output = model(train_input.narrow(0, b, batch_size))
            criterion = torch.nn.CrossEntropyLoss()
            loss = criterion(output, train_classes.narrow(0, b, batch_size))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        output_train = model(train_input)
        output_test = model(test_input)
        train_loss.append(criterion(output_train, train_classes).item())
        test_loss.append(criterion(output_test, test_classes).item())
        accuracy = 1 - nb_errors_aux(output_test, test_target) / 1000
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_epoch = i+1
        test_accuracy.append(accuracy)

        if i%100 == 0:
            print('Epoch : ',i+1, '\t', 'test loss :', test_loss[-1], '\t', 'train loss', train_loss[-1])
       
    return train_loss, test_loss, test_accuracy, best_accuracy

In [0]:
class ConvNet3(nn.Module):
    def __init__(self, nb_hidden):
        super(ConvNet3, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 2, out_channels = 4, kernel_size=2, stride = 1)
        self.conv2 = nn.Conv2d(4, 8, kernel_size=3, stride = 1, padding=2)
        self.conv3 = nn.Conv2d(8, 16, kernel_size = 3, stride = 1, padding=2)
        self.fc1 = nn.Linear(16*3*3, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = F.max_pool2d(F.relu(self.conv3(x)), 2)
        x = self.dropout1(x)
        x = x.view(-1, 16*3*3)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = F.relu(self.fc2(x))
        return x



In [0]:
class ConvNet2(nn.Module):
    def __init__(self, nb_hidden, i = 1):
        super(ConvNet2, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 2, out_channels = 4, kernel_size=2)
        self.conv2 = nn.Conv2d(4, 8, kernel_size=2, stride = 1)
        self.fc1 = nn.Linear(32, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 2)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), (2, 2)))
        x = self.dropout1(x)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 32)
        x = self.dropout1(x)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

In [0]:
class ConvNetAux(nn.Module):
    def __init__(self, nb_hidden):
        super(ConvNetAux, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 4, kernel_size=2)
        self.conv2 = nn.Conv2d(4, 8, kernel_size=2, stride = 1)
        self.fc1 = nn.Linear(32, nb_hidden)
        self.fc2 = nn.Linear(nb_hidden, 10)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), (2, 2)))
        x = self.dropout1(x)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 32)
        x = self.dropout1(x)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

In [0]:
model1 = ConvNet2(3)
model2 = ConvNet2(6)
model3 = ConvNet2(10)
model4 = ConvNet2(15)
model5 = ConvNet2(20)
model6 = ConvNet2(50)
model7 = ConvNet2(80)
model8 = ConvNet2(120)
model9 = ConvNet2(200)
model10 = ConvNet2(1000)


model11 = ConvNet3(3)
model12 = ConvNet3(6)
model13= ConvNet3(10)
model14 = ConvNet3(20)
model15 = ConvNet3(35)
model16 = ConvNet3(70)
model17 = ConvNet3(100)
model18 = ConvNet3(150)
model19 = ConvNet3(250)
model20 = ConvNet3(1000)


aux1 = ConvNetAux(5)
aux2 = ConvNetAux(10)
aux3 = ConvNetAux(20)
aux4 = ConvNetAux(50)
aux5 = ConvNetAux(100)

In [0]:
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11, model12, model13, model14,
          model15, model16, model17, model18, model19, model20]

auxs = [aux1, aux2, aux3, aux4, aux5]

In [39]:
if torch.cuda.is_available():
    device = torch.device("cuda:0") 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

for model in models:
    model.to(device)

for aux in auxs: 
    aux.to(device)

Running on the GPU


In [14]:
import time

start = time.time()
epochs = 200
accuracies = torch.empty(20, 4, dtype=torch.float)

for i in range(4):
    train_input, train_target, train_classes, test_input, test_target, test_classes = prolog.generate_pair_sets(1000)
    train_input = train_input.cuda()
    train_target = train_target.cuda()
    test_input = test_input.cuda()
    test_target = test_target.cuda()

    for j in range(20):
        _, _, _, best_accuracy = train_model(models[j], train_input, train_target, test_input,\
                                             test_target, epochs=epochs, lr = 0.005)
        print('Model', j+1 , best_accuracy)
        accuracies[j][i] = best_accuracy


minute = (time.time()-start) / 60
print('It took', minute, 'minutes.')



Epoch :  1 	 test loss : 0.6918238401412964 	 train loss 0.6975423693656921
Epoch :  101 	 test loss : 0.6659956574440002 	 train loss 0.6723462343215942
Model 1 0.552
Epoch :  1 	 test loss : 0.6877380013465881 	 train loss 0.687165379524231
Epoch :  101 	 test loss : 0.6877829432487488 	 train loss 0.6861211061477661
Model 2 0.702
Epoch :  1 	 test loss : 0.6921951770782471 	 train loss 0.7684497833251953
Epoch :  101 	 test loss : 0.647911012172699 	 train loss 0.679916262626648
Model 3 0.726
Epoch :  1 	 test loss : 0.6851949095726013 	 train loss 0.6833269000053406
Epoch :  101 	 test loss : 0.5831524729728699 	 train loss 0.6495051383972168
Model 4 0.753
Epoch :  1 	 test loss : 0.6997272968292236 	 train loss 0.7665861248970032
Epoch :  101 	 test loss : 0.6234093308448792 	 train loss 0.6478978991508484
Model 5 0.736
Epoch :  1 	 test loss : 0.6853013634681702 	 train loss 0.6909220218658447
Epoch :  101 	 test loss : 0.579643189907074 	 train loss 0.6054800152778625
Model 6 0.

In [18]:
accuracies.mean(1)

tensor([0.6335, 0.7170, 0.7292, 0.7345, 0.7352, 0.7283, 0.7240, 0.7260, 0.7345,
        0.7130, 0.5505, 0.5975, 0.7283, 0.7358, 0.5505, 0.5505, 0.5505, 0.5505,
        0.5505, 0.5505])

In [19]:
accuracies.std(1)

tensor([0.0985, 0.0385, 0.0268, 0.0197, 0.0102, 0.0122, 0.0180, 0.0041, 0.0231,
        0.0231, 0.0121, 0.0958, 0.0266, 0.0271, 0.0121, 0.0121, 0.0121, 0.0121,
        0.0121, 0.0121])

In [31]:
len(auxs)

5

In [40]:
epochs = 400
accuracies_aux = torch.empty(5, 4, dtype=torch.float)

start = time.time()
for i in range(4):
     train_input, train_target, train_classes, test_input, test_target, test_classes = prolog.generate_pair_sets(1000)
     train_input_aux = train_input.view(-1, 14, 14).unsqueeze(1).cuda()
     test_input_aux = test_input.view(-1, 14, 14).unsqueeze(1).cuda()
     train_classes_aux = train_classes.view(2000).cuda()
     test_classes_aux = test_classes.view(2000).cuda()
     test_target = test_target.cuda()
     for j in range(5):
        _, _, _, best_accuracy = train_model_aux(auxs[j], train_input_aux, train_classes_aux, test_input_aux, test_target, test_classes_aux, \
                                                 epochs = epochs, lr = 0.01)
        print('Model', j+1 , best_accuracy)
        accuracies_aux[j][i] = best_accuracy


minute = (time.time()-start) / 60
print('It took', minute, 'minutes.')

Epoch :  1 	 test loss : 17.545307159423828 	 train loss 17.64027976989746
Epoch :  101 	 test loss : 2.3030121326446533 	 train loss 2.298919200897217
Epoch :  201 	 test loss : 2.3031198978424072 	 train loss 2.2989156246185303
Epoch :  301 	 test loss : 2.303117513656616 	 train loss 2.298915386199951
Model 1 0.5640000000000001
Epoch :  1 	 test loss : 6.267399311065674 	 train loss 6.823856830596924
Epoch :  101 	 test loss : 2.160560369491577 	 train loss 2.1772992610931396
Epoch :  201 	 test loss : 2.1108527183532715 	 train loss 2.120410442352295
Epoch :  301 	 test loss : 2.0870611667633057 	 train loss 2.0540316104888916
Model 2 0.654
Epoch :  1 	 test loss : 8.222110748291016 	 train loss 7.6644110679626465
Epoch :  101 	 test loss : 2.144181251525879 	 train loss 2.140069007873535
Epoch :  201 	 test loss : 1.7356529235839844 	 train loss 1.7112184762954712
Epoch :  301 	 test loss : 1.4589918851852417 	 train loss 1.4491063356399536
Model 3 0.738
Epoch :  1 	 test loss : 7