# Imports

In [0]:
import numpy as np
import pickle as pkl
import time
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
from google.colab import files
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Load Sample Data

In [6]:
transformer = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) #Here we take in a PIL Image 
                                                        #from CIFAR and transform the data to CIFAR
roots= '/content/drive/My\ Drive/COMP_551/mini_3/data' #possible root

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transformer)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2) #Train Set

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transformer)  #Test Set
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# MLP Implementation

In [0]:
class MLP():
    '''MLP Implementation'''
    
    
    def __init__(self, layer_dims, n_epochs, batch_size, alpha, 
                 hidden_layer_activation='relu', output_activation='softmax', 
                 dropout_ratio=0):
        '''MLP Construction
    
        Input Arguments: 

        layer_dims ([int]): number of nodes in each layer including input/output layers
        hidden_layer_activation (String): ('sigmoid', 'relu', 'leaky_relu') corresponding to activatoin in all hidden layers
        output_activation (String): ('sigmoid', 'softmax')
        dropout_prob (float): number between 0 (includsive) and 1 (exclusive) representing probability of dropout
        '''
            
        self.layer_dims = []
        for dim in layer_dims:
            dim_int = int(dim)
            
            if dim_int <= 0:
                raise ValueError("Layer dimensions must be greater than zero")
            else:
                self.layer_dims.append(dim_int)
                
        self.n_layers = len(self.layer_dims)
        
        self.batch_size = int(batch_size)
        self.n_epochs = int(n_epochs)
        self.alpha = float(alpha)
        
        if self.batch_size <= 0:
            raise ValueError("batch_size must be greater than 0")
        if self.n_epochs <= 0:
            raise ValueError("n_epochs must be greater than zero")
        if self.alpha <= 0:
            raise ValueError("learning rate must be greater than zero")
                
            
        self.hidden_layer_activation = hidden_layer_activation
        self.output_activation = output_activation
        if self.hidden_layer_activation not in ['sigmoid', 'relu', 'leaky_relu']:
            raise ValueError("Invalid hidden layer activation. Must be 'sigmoid', 'relu', or 'leaky_relu'")
        if self.output_activation not in ['sigmoid', 'softmax']:
            raise ValueError("Invalid output layer activation. Must be 'sigmoid' or 'softmax'")
            
        self.dropout_ratio = float(dropout_ratio)
        if self.dropout_ratio < 0 or self.dropout_ratio >= 1:
            raise ValueError("dropout_ratio must be between 0 (inclusive) and 1 (exclusive)")
         
        self.layers = []
        self.n_weights = 0
        for i, dim in enumerate(self.layer_dims):
            if i == 0: #input layer
                layer = MLP_Layer(dim, dim_prev=None, activation=None, dropout_ratio=0, 
                                  prev_layer=None, next_layer=None)
            elif i == self.n_layers - 1: #output layer
                layer = MLP_Layer(dim, dim_prev=last_layer_added.dim, activation = self.output_activation, 
                                  dropout_ratio = 0, prev_layer = last_layer_added, next_layer = None)
                last_layer_added.next_layer = layer
                self.n_weights += layer.dim * (layer.dim_prev + 1)
            else: 
                layer = MLP_Layer(dim, dim_prev=last_layer_added.dim, activation = self.hidden_layer_activation, 
                                  dropout_ratio = self.dropout_ratio, prev_layer = last_layer_added, next_layer = None)
                last_layer_added.next_layer = layer
                self.n_weights += layer.dim * (layer.dim_prev + 1)
                
                
            last_layer_added = layer
            self.layers.append(layer)
        
        
    def fit(self, X, y):
        assert X.shape[0] == y.shape[0], "Input and labels must have matching first dimension."
        X_batches, y_batches = self.batch(X, y)
        cost_list = []

        y_z, y_hat = self.forward(X, y, test=True)
        J = self.cost(y, y_z)
        print(f"EPOCH {0} | COST {J}")
        cost_list.append(J)

        for epoch in range(self.n_epochs):
            for X_batch, y_batch in zip(X_batches, y_batches):
                
            
                
                y_z, y_hat = self.forward(X_batch, y_batch) # y_z is before applying sigmoid or softmax
                
                J = self.cost(y_batch, y_z)
                
                self.output_grad(y_batch, y_hat)
                
                for layer in self.layers[-2:0:-1]: #all hidden layers
                    layer.backward_pass()
                    
                for layer in self.layers[-1:0:-1]: #output and all hidden layers
                    layer.update(self.alpha)
                
            
            
            if (epoch+1) % 5 == 0:  
                y_z, y_hat = self.forward(X, y, test=True)
                J = self.cost(y, y_z)
                print(f"EPOCH {epoch + 1} | COST {J}")
                cost_list.append(J)
            
        return cost_list

    
    def cost(self, y, y_z):
                
        if self.output_activation == 'softmax':    
            return self.softmax_cost(y, y_z)
        elif self.output_activation == 'sigmoid':
            return self.sigmoid_cost(y, y_z)
    
        
    
    def fit_document_acc(self, X, y, X_val, y_val):
        assert X.shape[0] == y.shape[0], "Input and labels must have matching first dimension."
        assert X_val.shape[0] == y_val.shape[0]
        X_batches, y_batches = self.batch(X, y)
        acc_list_train = []
        acc_list_val = []

        acc_train = self.predict_return_acc(X, y)
        acc_val = self.predict_return_acc(X_val, y_val)
        print(f"EPOCH {0}| TRAIN ACCURACY: {acc_train} | VAL ACCURACY: {acc_val}")
        acc_list_train.append(acc_train)
        acc_list_val.append(acc_val)


        for epoch in range(self.n_epochs):
            for X_batch, y_batch in zip(X_batches, y_batches):
                
               
                y_z, y_hat = self.forward(X_batch, y_batch) # y_z is before applying sigmoid or softmax
                
                J = self.cost(y_batch, y_z)
                
                self.output_grad(y_batch, y_hat)
                
                for layer in self.layers[-2:0:-1]: #all hidden layers
                    layer.backward_pass()
                    
                for layer in self.layers[-1:0:-1]: #output and all hidden layers
                    layer.update(self.alpha)
                    
                
            
            if (epoch + 1) % 5 == 0:    
                acc_train = self.predict_return_acc(X, y)
                acc_val = self.predict_return_acc(X_val, y_val)
                print(f"EPOCH {epoch + 1} | TRAIN ACCURACY: {acc_train} | VAL ACCURACY: {acc_val}")
                acc_list_train.append(acc_train)
                acc_list_val.append(acc_val)
            
        
        return acc_list_train, acc_list_val
        
            
        
                    
                
    def forward(self, X, y, test=False):
        self.layers[0].A = X.T
                
        for layer in self.layers[1::]:
            layer.forward_pass(test = test)

        return self.layers[-1].Z, self.layers[-1].A
        


    def batch(self, X, y):
        X, y = self.shuffle(X, y)
        X_batches = []
        y_batches = []

        m = X.shape[0]

        for i in range(0, m, self.batch_size):
            if i + self.batch_size <= m:
                X_batch = X[i:i + self.batch_size, :]
                y_batch = y[i:i + self.batch_size, :]

            else:
                X_batch = X[i:, :]
                y_batch = y[i:, :]
                
            X_batches.append(X_batch)
            y_batches.append(y_batch)

        return X_batches, y_batches


    def shuffle(self, X, y):
        p = np.random.permutation(X.shape[0])
        X = X[p, :]
        y = y[p, :]
        return X, y

    def softmax_cost(self, y, y_z):
        '''
        y: N x C
        y_z: C x N
        '''
        N = y.shape[0]
        return (1 / N) * (- np.sum(np.diagonal(np.dot(y, y_z))) - self.logsumexp(y_z))
    
    def sigmoid_cost(self, y, y_z):
        '''
        y: N x 1
        y_z: 1 x N
        '''
        return np.mean( y.T * np.log1p(np.exp(-y_z)) + (1-y.T) * np.log1p(np.exp(y_z)) )
    
    def logsumexp(self, Z):
        Zmax = np.max(Z, 0)[None, :] # 1 x n_examples
        return np.sum(Zmax + np.log(np.sum(np.exp(Z  - Zmax), axis=0))) #float

    def output_grad(self, y, y_hat):
        '''
        y: N X C (one hot encoded labels)
        y_hat: C x N (after applying softmax/sigmoid)
        '''
        N = y.shape[0]
        self.layers[-1].dZ = (1 / N) * (y_hat - y.T) # dim x n_examples
        #for both softmax and sigmoid!
        self.layers[-1].dW = np.dot(self.layers[-1].prev_layer.A, self.layers[-1].dZ.T) # dim_prev x dim
        self.layers[-1].db = np.sum(self.layers[-1].dZ, axis=1).reshape(-1, 1) # dim x 1

    def predict(self, X, y):
        _, y_hat = self.forward(X, y, test=True)
        y_pred = np.argmax(y_hat, axis=0).reshape(-1, 1)
        y_true = np.argmax(y, axis=1).reshape(-1, 1)
        return y_pred, y_true
    
    def predict_return_acc(self, X, y):
        y_pred, y_true = self.predict(X, y)
        return np.mean(y_pred == y_true)


 
        
        
        
        
        
        


class MLP_Layer():
    '''Layer of an MLP'''
    
    def __init__(self, dim, dim_prev=None, activation=None, dropout_ratio=0, next_layer=None, prev_layer=None, zeta=0.1):
        '''Constructing layer of MLP
    
        Input Arguments:

        dim (int): number of nodes in this layer
        dim_prev (int or None): number of nodes in the previous layer. None if input layer.
        activation (String or None): 'sigmoid', 'softmax', 'relu', or 'leaky_relu' indicating non-linearity to apply. None if input layer
        dropout_ratio (float): number between 0 (includsive) and 1 (exclusive) representing probability of dropout
        next_layer (MLP_Layer or None): the next layer in the MLP; if this is the output layer, set to None 
        prev_layer (MLP_Layer or None): the previous layer in the MLP; if this is the input layer, set to None 
        zeta(float): parameter for leaky relu activation
        '''
    
            
        self.dim = int(dim)
        if dim_prev != None:
            self.dim_prev = int(dim_prev)
            if self.dim_prev <= 0:
                raise ValueError("Layer dimensions must be positive integers")

        if self.dim <= 0:
            raise ValueError("Dimensions of layer are non-positive integers")
            
        self.dropout_ratio = float(dropout_ratio)

        if self.dropout_ratio < 0 or self.dropout_ratio >= 1:
            raise ValueError("dropout_ratio must be between 0 (inclusive) and 1 (exclusive)")
            
            
        self.zeta = float(zeta)
        if self.zeta <= 0:
            raise ValueError("zeta must be greater than zero")

        self.activation = activation
        if self.activation != None:
            if self.activation not in ['sigmoid', 'relu', 'leaky_relu', 'softmax']:
                raise ValueError("Invalid layer activation. Must be 'sigmoid', 'relu', 'leaky_relu', or 'softmax'")
        

        if next_layer != None:
            if not isintance(next_layer, MLP_Layer):
                raise TypeError("next_layer must be None or of type MLP_Layer")

        if prev_layer != None:
            if not isinstance(prev_layer, MLP_Layer):
                raise TypeError("prev_layer must be None or of type MLP_Layer")

        self.next_layer = next_layer
        self.prev_layer = prev_layer
        
        if dim_prev != None:
            self.W = np.random.randn(dim_prev, dim) * 0.1
            self.b = np.zeros((dim, 1))
            self.dW = np.zeros(self.W.shape)
            self.db = np.zeros(self.b.shape)
        else:
            self.W = None
            self.b = None
            self.dW = None
            self.db = None
        
        self.Z = None
        self.A = None

        
        self.dZ = None
        self.dA = None
             
       
            
    def forward_pass(self, test=False):
        self.Z = np.dot(self.W.T, self.prev_layer.A) + self.b # dim x n_examples
        self.A = self.activate()
        if test: #testing -- scale with zero dropout
            self.A = self.A * (1 - self.dropout_ratio)
        else: #training -- apply dropout
            dropout_mask = np.random.choice([0, 1], size = self.Z.shape, p=[self.dropout_ratio, 1 - self.dropout_ratio])
            self.A = self.A * dropout_mask
    
    def activate(self):
        if self.activation == 'sigmoid':
            return self.sigmoid()
        elif self.activation == 'relu':
            return self.relu()
        elif self.activation == 'leaky_relu':
            return self.leaky_relu()
        elif self.activation == 'softmax':
            return self.softmax()
            
    def sigmoid(self):
        return 1 / (1 + np.exp(-self.Z))
    
    def relu(self):
        return np.maximum(self.Z, np.zeros(self.Z.shape))
    
    def leaky_relu(self):
        return np.maximum(self.Z, np.zeros(self.Z.shape)) + self.zeta*np.minimum(self.Z, np.zeros(self.Z.shape))
    
    def softmax(self):
        return np.exp(self.Z) / (np.sum(np.exp(self.Z), axis=0).reshape(1, -1))
    
    
    def backward_pass(self):
        '''Calculates gradient for hidden layer'''
        self.dA = np.dot(self.next_layer.W, self.next_layer.dZ) # dim x n_examples

        if self.activation == 'sigmoid':
            self.dZ = self.dA * (self.A * (1 - self.A)) # dim x n_examples
        elif self.activation == 'relu':
            self.dZ = self.dA * (self.A > 0).astype(int)
        elif self.activation == 'leaky_relu':
            self.dZ = self.dA * (self.A > 0).astype(int)
            np.where(self.dZ, self.dZ, self.zeta) # if 0, set to zeta
        
        self.dW = np.dot(self.prev_layer.A, self.dZ.T) # dim_prev x dim
        self.db = np.sum(self.dZ, axis=1, keepdims=True) # dim x 1
        
        
    def update(self, alpha):
        self.W = self.W - alpha * self.dW
        self.b = self.b - alpha * self.b

## CIFAR 10

In [0]:
trainloader_complete = torch.utils.data.DataLoader(trainset, batch_size=50000,
                                          shuffle=True, num_workers=2) #Entire Train Set
testloader_complete = torch.utils.data.DataLoader(testset, batch_size=10000,
                                          shuffle=True, num_workers=2) #Entire Test Set

X_train_tensor, y_train_tensor = iter(trainloader_complete).next()
X_test_tensor, y_test_tensor = iter(testloader_complete).next()

In [0]:
one_hot = OneHotEncoder(sparse=False)
y_train = one_hot.fit_transform(y_train_tensor.numpy().reshape(-1, 1))
y_test = one_hot.fit_transform(y_test_tensor.numpy().reshape(-1, 1))
X_train = X_train_tensor.numpy().reshape(50000, -1)
X_test = X_test_tensor.numpy().reshape(10000, -1)

In [0]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, random_state=42, shuffle=True, stratify=y_train, test_size=0.2)

In [0]:
def save_file(file, filename):
  with open(filename, 'wb') as f:
    pkl.dump(file, f) 

def document_model(model, X_train, y_train, X_val, y_val):
  result = {'model': None, 'train_acc_by_epoch': None, 'val_acc_by_epoch': None, 
            'train_acc': None, 'val_acc': None, 'conf_matrix_train': None, 
            'conf_matrix_val': None, 'train_time': None}
  t0 = time.clock()
  train_acc_by_epoch, val_acc_by_epoch = model.fit_document_acc(X_train, y_train, X_val, y_val)
  t1 = time.clock()

  tdiff = t1 - t0
  result['train_time'] = tdiff

  # cost = model.fit(X_train, y_train)

  print("EVALUATING ACCURACY")
  train_acc = model.predict_return_acc(X_train, y_train)
  val_acc = model.predict_return_acc(X_val, y_val)

  print("GENERATING CONFUSION MATRIX")
  y_pred_train, y_true_train = model.predict(X_train, y_train)
  y_pred_val, y_true_val = model.predict(X_val, y_val)
  conf_matrix_train = confusion_matrix(y_true_train, y_pred_train)
  conf_matrix_val = confusion_matrix(y_true_val, y_pred_val)

  result['model'] = model
  result['train_acc_by_epoch'] = train_acc_by_epoch
  result['val_acc_by_epoch'] = val_acc_by_epoch
  result['train_acc'] = train_acc
  result['val_acc'] = val_acc
  result['conf_matrix_train'] = conf_matrix_train
  result['conf_matrix_val'] = conf_matrix_val
  return result


First, let's settle on an optimized architecture to use for future experiments.

Input Dimension: 3072

Output Dimension: 10


**Search Space**


Number of Layers: [1, 2, 3, 4, 5]

Hidden Nodes per Layer: [50-1000]



### Zero Layer NN

In [0]:
mlp_0 = MLP([3072, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_0_results = document_model(mlp_0, X_train, y_train, X_val, y_val)

EPOCH 0| TRAIN ACCURACY: 0.098525 | VAL ACCURACY: 0.0997
EVALUATING ACCURACY
GENERATING CONFUSION MATRIX


In [0]:
save_file(mlp_0_results, 'mlp_0.pkl')

KeyboardInterrupt: ignored

### One Layer NN

In [0]:
mlp_10 = MLP([3072, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_10_results = document_model(mlp_10, X_train, y_train, X_val, y_val)

EPOCH 0| TRAIN ACCURACY: 0.08715 | VAL ACCURACY: 0.0869
EPOCH 5 | TRAIN ACCURACY: 0.3164 | VAL ACCURACY: 0.3066
EPOCH 10 | TRAIN ACCURACY: 0.3413 | VAL ACCURACY: 0.3278
EPOCH 15 | TRAIN ACCURACY: 0.35545 | VAL ACCURACY: 0.3432
EPOCH 20 | TRAIN ACCURACY: 0.36965 | VAL ACCURACY: 0.3565
EPOCH 25 | TRAIN ACCURACY: 0.373 | VAL ACCURACY: 0.3607
EPOCH 30 | TRAIN ACCURACY: 0.38005 | VAL ACCURACY: 0.3666
EPOCH 35 | TRAIN ACCURACY: 0.38625 | VAL ACCURACY: 0.3729
EPOCH 40 | TRAIN ACCURACY: 0.391225 | VAL ACCURACY: 0.3817
EPOCH 45 | TRAIN ACCURACY: 0.396675 | VAL ACCURACY: 0.3866
EPOCH 50 | TRAIN ACCURACY: 0.3989 | VAL ACCURACY: 0.3911
EPOCH 55 | TRAIN ACCURACY: 0.399775 | VAL ACCURACY: 0.3883
EPOCH 60 | TRAIN ACCURACY: 0.403325 | VAL ACCURACY: 0.3916
EPOCH 65 | TRAIN ACCURACY: 0.405925 | VAL ACCURACY: 0.3916
EPOCH 70 | TRAIN ACCURACY: 0.408075 | VAL ACCURACY: 0.3949
EPOCH 75 | TRAIN ACCURACY: 0.410875 | VAL ACCURACY: 0.3932
EPOCH 80 | TRAIN ACCURACY: 0.41295 | VAL ACCURACY: 0.3947
EPOCH 85 | TRAI

KeyboardInterrupt: ignored

In [0]:
mlp_50 = MLP([3072, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_50_results = document_model(mlp_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_50_results, 'mlp_50.pkl')

In [0]:
mlp_100 = MLP([3072, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_100_results = document_model(mlp_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_results, 'mlp_100.pkl')

In [0]:
mlp_200 = MLP([3072, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_200_results = document_model(mlp_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_results, 'mlp_200.pkl')

In [0]:
mlp_500 = MLP([3072, 500, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_500_results = document_model(mlp_500, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_results, 'mlp_500.pkl')

In [0]:
mlp_1000 = MLP([3072, 1000, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_1000_results = document_model(mlp_1000, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_results, 'mlp_1000.pkl')

In [0]:
files.download('mlp_1000.pkl')

### Two Layer NN


In [0]:
mlp_10_10 = MLP([3072, 10, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_50_10 = MLP([3072, 50, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_50_50 = MLP([3072, 50, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_10 = MLP([3072, 100, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_50 = MLP([3072, 100, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_100 = MLP([3072, 100, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_10 = MLP([3072, 200, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_50 = MLP([3072, 200, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_100 = MLP([3072, 200, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_200 = MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_500_10 = MLP([3072, 500, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_500_50 = MLP([3072, 500, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_500_100 = MLP([3072, 500, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_500_200 = MLP([3072, 500, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_500_500 = MLP([3072, 500, 500, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

mlp_1000_10 = MLP([3072, 1000, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_1000_50 = MLP([3072, 1000, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_1000_100 = MLP([3072, 1000, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_1000_200 = MLP([3072, 1000, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_1000_500 = MLP([3072, 1000, 500, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_1000_1000 = MLP([3072, 1000, 1000, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

In [0]:
mlp_10_10_results = document_model(mlp_10_10, X_train, y_train, X_val, y_val)

EPOCH 0| TRAIN ACCURACY: 0.10495 | VAL ACCURACY: 0.101
EPOCH 5 | TRAIN ACCURACY: 0.15525 | VAL ACCURACY: 0.1514
EPOCH 10 | TRAIN ACCURACY: 0.19515 | VAL ACCURACY: 0.1918
EPOCH 15 | TRAIN ACCURACY: 0.19925 | VAL ACCURACY: 0.1995
EPOCH 20 | TRAIN ACCURACY: 0.203575 | VAL ACCURACY: 0.2045
EPOCH 25 | TRAIN ACCURACY: 0.20825 | VAL ACCURACY: 0.2076
EPOCH 30 | TRAIN ACCURACY: 0.21165 | VAL ACCURACY: 0.209
EPOCH 35 | TRAIN ACCURACY: 0.219875 | VAL ACCURACY: 0.2207
EPOCH 40 | TRAIN ACCURACY: 0.2175 | VAL ACCURACY: 0.2163
EPOCH 45 | TRAIN ACCURACY: 0.245275 | VAL ACCURACY: 0.2466
EPOCH 50 | TRAIN ACCURACY: 0.25405 | VAL ACCURACY: 0.2523
EPOCH 55 | TRAIN ACCURACY: 0.256775 | VAL ACCURACY: 0.2562
EPOCH 60 | TRAIN ACCURACY: 0.25615 | VAL ACCURACY: 0.2565
EPOCH 65 | TRAIN ACCURACY: 0.255975 | VAL ACCURACY: 0.2542
EPOCH 70 | TRAIN ACCURACY: 0.2543 | VAL ACCURACY: 0.2563
EPOCH 75 | TRAIN ACCURACY: 0.2627 | VAL ACCURACY: 0.2612
EPOCH 80 | TRAIN ACCURACY: 0.251125 | VAL ACCURACY: 0.2509
EPOCH 85 | TRAIN

In [0]:
save_file(mlp_10_10_results, 'mlp_10_10.pkl')

In [0]:
files.download('mlp_10_10.pkl')

MessageError: ignored

In [0]:
mlp_50_10_results = document_model(mlp_50_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_50_10_results, 'mlp_50_10.pkl')

In [0]:
files.download('mlp_50_10.pkl')

In [0]:
mlp_50_50_results = document_model(mlp_50_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_50_50_results, 'mlp_50_50.pkl')

In [0]:
files.download('mlp_50_50.pkl')

In [0]:
mlp_100_10_results = document_model(mlp_100_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_10_results, 'mlp_100_10.pkl')

In [0]:
files.download('mlp_100_10.pkl')

In [0]:
mlp_100_50_results = document_model(mlp_100_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_50_results, 'mlp_100_50.pkl')

In [0]:
files.download('mlp_100_50.pkl')

In [0]:
mlp_100_100_results = document_model(mlp_100_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_100_results, 'mlp_100_100.pkl')

In [0]:
files.download('mlp_100_100.pkl')

In [0]:
mlp_200_10_results = document_model(mlp_200_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_10_results, 'mlp_200_10.pkl')

In [0]:
files.download('mlp_200_10.pkl')

In [0]:
mlp_200_50_results = document_model(mlp_200_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_50_results, 'mlp_200_50.pkl')

In [0]:
files.download('mlp_200_50.pkl')

In [0]:
mlp_200_100_results = document_model(mlp_200_100, X_train, y_train, X_val, y_val)

EPOCH 0| TRAIN ACCURACY: 0.105625 | VAL ACCURACY: 0.1043
EPOCH 5 | TRAIN ACCURACY: 0.322775 | VAL ACCURACY: 0.3142
EPOCH 10 | TRAIN ACCURACY: 0.350675 | VAL ACCURACY: 0.344
EPOCH 15 | TRAIN ACCURACY: 0.3711 | VAL ACCURACY: 0.3671
EPOCH 20 | TRAIN ACCURACY: 0.38175 | VAL ACCURACY: 0.3758
EPOCH 25 | TRAIN ACCURACY: 0.39075 | VAL ACCURACY: 0.3881
EPOCH 30 | TRAIN ACCURACY: 0.39555 | VAL ACCURACY: 0.3928
EPOCH 35 | TRAIN ACCURACY: 0.40525 | VAL ACCURACY: 0.3983
EPOCH 40 | TRAIN ACCURACY: 0.41105 | VAL ACCURACY: 0.4053
EPOCH 45 | TRAIN ACCURACY: 0.415925 | VAL ACCURACY: 0.4105
EPOCH 50 | TRAIN ACCURACY: 0.422375 | VAL ACCURACY: 0.4156
EPOCH 55 | TRAIN ACCURACY: 0.42555 | VAL ACCURACY: 0.419
EPOCH 60 | TRAIN ACCURACY: 0.43075 | VAL ACCURACY: 0.4244
EPOCH 65 | TRAIN ACCURACY: 0.4362 | VAL ACCURACY: 0.4277
EPOCH 70 | TRAIN ACCURACY: 0.4388 | VAL ACCURACY: 0.4302
EPOCH 75 | TRAIN ACCURACY: 0.443525 | VAL ACCURACY: 0.4332
EPOCH 80 | TRAIN ACCURACY: 0.448475 | VAL ACCURACY: 0.4351
EPOCH 85 | TRAI

In [0]:
save_file(mlp_200_100_results, 'mlp_200_100.pkl')

In [0]:
files.download('mlp_200_100.pkl')

----------------------------------------
Exception happened during processing of request from ('::ffff:127.0.0.1', 42362, 0, 0)
Traceback (most recent call last):
  File "/usr/lib/python3.6/socketserver.py", line 320, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/usr/lib/python3.6/socketserver.py", line 351, in process_request
    self.finish_request(request, client_address)
  File "/usr/lib/python3.6/socketserver.py", line 364, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/usr/lib/python3.6/socketserver.py", line 724, in __init__
    self.handle()
  File "/usr/lib/python3.6/http/server.py", line 418, in handle
    self.handle_one_request()
  File "/usr/lib/python3.6/http/server.py", line 406, in handle_one_request
    method()
  File "/usr/lib/python3.6/http/server.py", line 639, in do_GET
    self.copyfile(f, self.wfile)
  File "/usr/lib/python3.6/http/server.py", line 800, in copyfile
    shutil.copyfil

KeyboardInterrupt: ignored

In [0]:
mlp_200_200_results = document_model(mlp_200_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_results, 'mlp_200_200.pkl')

In [0]:
files.download('mlp_200_200.pkl')

In [0]:
mlp_500_10_results = document_model(mlp_500_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_10_results, 'mlp_500_10.pkl')

In [0]:
files.download('mlp_500_10.pkl')

In [0]:
mlp_500_50_results = document_model(mlp_500_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_50_results, 'mlp_500_50.pkl')

In [0]:
files.download('mlp_500_50.pkl')

In [0]:
mlp_500_100_results = document_model(mlp_500_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_100_results, 'mlp_500_100.pkl')

In [0]:
files.download('mlp_500_100.pkl')

In [0]:
mlp_500_200_results = document_model(mlp_500_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_200_results, 'mlp_500_200.pkl')

In [0]:
files.download('mlp_500_200.pkl')

In [0]:
mlp_500_500_results = document_model(mlp_500_500, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_500_results, 'mlp_500_500.pkl')

In [0]:
files.download('mlp_500_500.pkl')

In [0]:
mlp_1000_10_results = document_model(mlp_1000_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_10_results, 'mlp_1000_10.pkl')

In [0]:
files.download('mlp_1000_10.pkl')

In [0]:
mlp_1000_50_results = document_model(mlp_1000_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_50_results, 'mlp_1000_50.pkl')

In [0]:
files.download('mlp_1000_50.pkl')

In [0]:
mlp_1000_100_results = document_model(mlp_1000_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_100_results, 'mlp_1000_100.pkl')

In [0]:
files.download('mlp_1000_100.pkl')

In [0]:
mlp_1000_200_results = document_model(mlp_1000_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_200_results, 'mlp_1000_200.pkl')

In [0]:
files.download('mlp_1000_200.pkl')

In [0]:
mlp_1000_500_results = document_model(mlp_1000_500, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_500_results, 'mlp_1000_500.pkl')

In [0]:
files.download('mlp_1000_500.pkl')

In [0]:
mlp_1000_1000_results = document_model(mlp_1000_1000, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_1000_1000_results, 'mlp_1000_1000.pkl')

In [0]:
files.download('mlp_1000_1000.pkl')

### Three Layer NN

In [0]:
mlp_10_10_10 = MLP([3072, 10, 10, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_50_10_10 = MLP([3072, 50, 10, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_50_50_10 = MLP([3072, 50, 50, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_50_50_50 = MLP([3072, 50, 50, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_10_10 = MLP([3072, 100, 10, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_50_10 = MLP([3072, 100, 50, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_50_50 = MLP([3072, 100, 50, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_100_10 = MLP([3072, 100, 100, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_100_50 = MLP([3072, 100, 100, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_100_100_100 = MLP([3072, 100, 100, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_10_10 = MLP([3072, 200, 10, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_50_10 = MLP([3072, 200, 50, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_50_50 = MLP([3072, 200, 50, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_100_10 = MLP([3072, 200, 100, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_100_50 = MLP([3072, 200, 100, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_100_100 = MLP([3072, 200, 100, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_200_10 = MLP([3072, 200, 200, 10, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_200_50 = MLP([3072, 200, 200, 50, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_200_100 = MLP([3072, 200, 200, 100, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_200_200 = MLP([3072, 200, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)

mlp_500_50_50 = MLP([3072, 500, 50, 50, 10], 2000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_100_50 = MLP([3072, 500, 100, 50, 10], 2000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_100_100 = MLP([3072, 500, 100, 100, 10], 2000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_200_50 = MLP([3072, 500, 200, 50, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_200_100 = MLP([3072, 500, 200, 100, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_200_200 = MLP([3072, 500, 200, 200, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_500_50 = MLP([3072, 500, 500, 50, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_500_100 = MLP([3072, 500, 500, 100, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_500_200 = MLP([3072, 500, 500, 200, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)
mlp_500_500_500 = MLP([3072, 500, 500, 500, 10], 3000, 1024, 0.05, 'relu', 'softmax', 0.5)






In [0]:
mlp_10_10_10_results = document_model(mlp_10_10_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_10_10_10_results, 'mlp_10_10_10.pkl')

In [0]:
files.download('mlp_10_10_10.pkl')

In [0]:
mlp_50_10_10_results = document_model(mlp_50_10_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_50_10_10_results, 'mlp_50_10_10.pkl')

In [0]:
files.download('mlp_50_10_10.pkl')

In [0]:
mlp_50_50_10_results = document_model(mlp_50_50_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_50_50_10_results, 'mlp_50_50_10.pkl')

In [0]:
files.download('mlp_50_50_10.pkl')

In [0]:
mlp_50_50_50_results = document_model(mlp_50_50_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_50_50_50_results, 'mlp_50_50_50.pkl')

In [0]:
files.download('mlp_50_50_50.pkl')

In [0]:
mlp_100_10_10_results = document_model(mlp_100_10_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_10_10_results, 'mlp_100_10_10.pkl')

In [0]:
files.download('mlp_100_10_10.pkl')

In [0]:
mlp_100_50_10_results = document_model(mlp_100_50_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_50_10_results, 'mlp_100_50_10.pkl')

In [0]:
files.download('mlp_100_50_10.pkl')

In [0]:
mlp_100_50_50_results = document_model(mlp_100_50_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_50_50_results, 'mlp_100_50_50.pkl')

In [0]:
files.download('mlp_100_50_50.pkl')

In [0]:
mlp_100_100_10_results = document_model(mlp_100_100_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_100_10_results, 'mlp_100_100_10.pkl')

In [0]:
files.download('mlp_100_100_10.pkl')

In [0]:
mlp_100_100_50_results = document_model(mlp_100_100_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_100_50_results, 'mlp_100_100_50.pkl')

In [0]:
files.download('mlp_100_100_50.pkl')

In [0]:
mlp_100_100_100_results = document_model(mlp_100_100_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_100_100_100_results, 'mlp_100_100_100.pkl')

In [0]:
files.download('mlp_100_100_100.pkl')

In [0]:
mlp_200_10_10_results = document_model(mlp_200_10_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_10_10_results, 'mlp_200_10_10.pkl')

In [0]:
files.download('mlp_200_10_10.pkl')

In [0]:
mlp_200_50_10_results = document_model(mlp_200_50_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_50_10_results, 'mlp_200_50_10.pkl')

In [0]:
files.download('mlp_200_50_10.pkl')

In [0]:
mlp_200_50_50_results = document_model(mlp_200_50_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_50_50_results, 'mlp_200_50_50.pkl')

In [0]:
files.download('mlp_200_50_50.pkl')

In [0]:
mlp_200_100_10_results = document_model(mlp_200_100_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_100_10_results, 'mlp_200_100_10.pkl')

In [0]:
files.download('mlp_200_100_10.pkl')

In [0]:
mlp_200_100_50_results = document_model(mlp_200_100_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_100_50_results, 'mlp_200_100_50.pkl')

In [0]:
files.download('mlp_200_100_50.pkl')

In [0]:
mlp_200_100_100_results = document_model(mlp_200_100_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_100_100_results, 'mlp_200_100_100.pkl')

In [0]:
files.download('mlp_200_100_100.pkl')

In [0]:
mlp_200_200_10_results = document_model(mlp_200_200_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_10_results, 'mlp_200_200_10.pkl')

In [0]:
files.download('mlp_200_200_10.pkl')

In [0]:
mlp_200_200_50_results = document_model(mlp_200_200_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_50_results, 'mlp_200_200_50.pkl')

In [0]:
files.download('mlp_200_200_50.pkl')

In [0]:
mlp_200_200_100_results = document_model(mlp_200_200_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_100_results, 'mlp_200_200_100.pkl')

In [0]:
files.download('mlp_200_200_100.pkl')

In [0]:
mlp_200_200_200_results = document_model(mlp_200_200_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_200_results, 'mlp_200_200_200.pkl')

In [0]:
files.download('mlp_200_200_200.pkl')

In [0]:
mlp_500_10_10_results = document_model(mlp_500_10_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_10_10_results, 'mlp_500_10_10.pkl')

In [0]:
files.download('mlp_500_10_10.pkl')

In [0]:
mlp_500_50_10_results = document_model(mlp_500_50_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_50_10_results, 'mlp_500_50_10.pkl')

In [0]:
files.download('mlp_500_50_10.pkl')

In [0]:
mlp_500_50_50_results = document_model(mlp_500_50_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_50_50_results, 'mlp_500_50_50.pkl')

In [0]:
files.download('mlp_500_50_50.pkl')

In [0]:
mlp_500_100_10_results = document_model(mlp_500_100_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_100_10_results, 'mlp_500_100_10.pkl')

In [0]:
files.download('mlp_500_100_10.pkl')

In [0]:
mlp_500_100_50_results = document_model(mlp_500_100_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_100_50_results, 'mlp_500_100_50.pkl')

In [0]:
files.download('mlp_500_100_50.pkl')

In [0]:
mlp_500_100_100_results = document_model(mlp_500_100_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_100_100_results, 'mlp_500_100_100.pkl')

In [0]:
files.download('mlp_500_100_100.pkl')

In [0]:
mlp_500_200_10_results = document_model(mlp_500_200_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_200_10_results, 'mlp_500_200_10.pkl')

In [0]:
files.download('mlp_500_200_10.pkl')

In [0]:
mlp_500_200_50_results = document_model(mlp_500_200_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_200_50_results, 'mlp_500_200_50.pkl')

In [0]:
files.download('mlp_500_200_50.pkl')

In [0]:
mlp_500_200_100_results = document_model(mlp_500_200_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_200_100_results, 'mlp_500_200_100.pkl')

In [0]:
files.download('mlp_500_200_100.pkl')

In [0]:
mlp_500_200_200_results = document_model(mlp_500_200_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_200_200_results, 'mlp_500_200_200.pkl')

In [0]:
files.download('mlp_500_200_200.pkl')

In [0]:
mlp_500_500_10_results = document_model(mlp_500_500_10, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_500_10_results, 'mlp_500_500_10.pkl')

In [0]:
files.download('mlp_500_500_10.pkl')

In [0]:
mlp_500_500_50_results = document_model(mlp_500_500_50, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_500_50_results, 'mlp_500_500_50.pkl')

In [0]:
files.download('mlp_500_500_50.pkl')

In [0]:
mlp_500_500_100_results = document_model(mlp_500_500_100, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_500_100_results, 'mlp_500_500_100.pkl')

In [0]:
files.download('mlp_500_500_100.pkl')

In [0]:
mlp_500_500_200_results = document_model(mlp_500_500_200, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_500_200_results, 'mlp_500_500_200.pkl')

In [0]:
files.download('mlp_500_500_200.pkl')

In [0]:
mlp_500_500_500_results = document_model(mlp_500_500_500, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_500_500_500_results, 'mlp_500_500_500.pkl')

In [0]:
files.download('mlp_500_500_500.pkl')

### Dropout

In [0]:
mlp_200_200_0 = MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0)
mlp_200_200_02 = MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.2)
mlp_200_200_04 = MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.4)
mlp_200_200_06 = MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.6)
mlp_200_200_08 = MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.8)

In [0]:
mlp_200_200_0_results = document_model(mlp_200_200_0, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_0_results, 'mlp_200_200_0.pkl')

In [0]:
mlp_200_200_02_results = document_model(mlp_200_200_02, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_02_results, 'mlp_200_200_02.pkl')

In [0]:
mlp_200_200_04_results = document_model(mlp_200_200_04, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_04_results, 'mlp_200_200_04.pkl')

In [0]:
mlp_200_200_06_results = document_model(mlp_200_200_06, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_06_results, 'mlp_200_200_06.pkl')

In [0]:
mlp_200_200_08_results = document_model(mlp_200_200_08, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_08_results, 'mlp_200_200_08.pkl')

### Activation

In [0]:
mlp_200_200_relu =  MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'relu', 'softmax', 0.5)
mlp_200_200_leakyrelu =  MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'leaky_relu', 'softmax', 0.5)
mlp_200_200_sigmoid =  MLP([3072, 200, 200, 10], 1000, 1024, 0.1, 'sigmoid', 'softmax', 0.5)

In [0]:
mlp_200_200_relu_results = document_model(mlp_200_200_relu, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_relu_results, 'mlp_200_200_relu.pkl')

In [0]:
mlp_200_200_leakyrelu_results = document_model(mlp_200_200_leakyrelu, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_leakyrelu_results, 'mlp_200_200_leakyrelu.pkl')

In [0]:
mlp_200_200_sigmoid_results = document_model(mlp_200_200_sigmoid, X_train, y_train, X_val, y_val)

In [0]:
save_file(mlp_200_200_sigmoid_results, 'mlp_200_200_sigmoid.pkl')

# CNN Implementation


In [0]:
#testing code

class Swish(nn.Module):
    def forward(self, input_tensor):
        return input_tensor * torch.sigmoid(input_tensor)

In [0]:
epochfunctrain = []
epochfunctest = [] #arrays to store results
def accuracyepoch (epoch, test):
  correct = 0
  total = 0 #temp variables to store results
  with torch.no_grad():
      for data in test: #extracting data
          images, labels = data 
          outputs = net(images) #running conv net
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)   #adding label size to total
          correct += (predicted == labels).sum().item() #if label is predicted than incr correct

 
  if test==trainloader: #appending arrays depending on train or test
    epochfunctrain.append(100*correct/total)
    print('Accuracy: %d %%' % (
      100 * correct / total)+ ' epoch of train ' + str(epoch+1)) #printing results
  elif test==testloader:
    print('Accuracy: %d %%' % (
      100 * correct / total)+ ' epoch of test ' + str(epoch+1)) #printing results
    (epochfunctest.append(100*correct/total))

In [0]:


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()   
        self.conv1 = nn.Conv2d(3, 6, 5) #Convolutional Layer 1, f=5, increase filters from 3 to 6.
        self.pool = nn.MaxPool2d(2, 2) #Pooling Layer, used multiple times it is a 2X2 Layer
        self.conv2 = nn.Conv2d(6, 16, 5) #Convolutional Layer 2, f=5, increase filters from 6 to 16.
        self.fc1 = nn.Linear(16 * 5 * 5, 120) #Apply linear transformation to data Input: 400, output 120
        self.fc2 = nn.Linear(120, 84) #Linear transformation, input 120, output 84
        self.fc3 = nn.Linear(84, 10)  #Linear transformation, input 84, output 10
        
    def forward(self, x):
        m=Swish()
        swish=m.forward
        x = self.pool(swish(self.conv1(x))) #Layer 1: Conv Layer 1, with Relu as function, 
                                              #then we apply 2x2 pooling
        x = self.pool(swish(self.conv2(x))) #Layer 2: Conv Layer 2, with Relu as function, 
                                              #then we apply 2x2 pooling                                     
        x = x.view(-1, 16 * 5 * 5)
        x = swish(self.fc1(x))
        x = swish(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [0]:


criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [26]:
for epoch in range(50):  # loop over the dataset multiple times 
    

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            
    accuracyepoch(epoch, testloader)
    accuracyepoch(epoch, trainloader)  
        

print('Finished Training')

[1,  2000] loss: 0.468
[1,  4000] loss: 0.490
[1,  6000] loss: 0.530
[1,  8000] loss: 0.535
[1, 10000] loss: 0.536
[1, 12000] loss: 0.561
Accuracy: 61 % epoch of test 1
Accuracy: 84 % epoch of train 1
[2,  2000] loss: 0.436
[2,  4000] loss: 0.499
[2,  6000] loss: 0.501
[2,  8000] loss: 0.537
[2, 10000] loss: 0.563
[2, 12000] loss: 0.575
Accuracy: 59 % epoch of test 2
Accuracy: 80 % epoch of train 2
[3,  2000] loss: 0.442
[3,  4000] loss: 0.467
[3,  6000] loss: 0.503
[3,  8000] loss: 0.543
[3, 10000] loss: 0.549
[3, 12000] loss: 0.553
Accuracy: 61 % epoch of test 3
Accuracy: 85 % epoch of train 3
[4,  2000] loss: 0.433
[4,  4000] loss: 0.462
[4,  6000] loss: 0.511
[4,  8000] loss: 0.528
[4, 10000] loss: 0.540
[4, 12000] loss: 0.598
Accuracy: 59 % epoch of test 4
Accuracy: 81 % epoch of train 4
[5,  2000] loss: 0.424
[5,  4000] loss: 0.479
[5,  6000] loss: 0.493
[5,  8000] loss: 0.525
[5, 10000] loss: 0.528
[5, 12000] loss: 0.564
Accuracy: 60 % epoch of test 5
Accuracy: 84 % epoch of tra

In [0]:
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)

In [28]:
net = Net()
net.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [0]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 64 %
Accuracy of   car : 73 %
Accuracy of  bird : 43 %
Accuracy of   cat : 43 %
Accuracy of  deer : 56 %
Accuracy of   dog : 53 %
Accuracy of  frog : 62 %
Accuracy of horse : 65 %
Accuracy of  ship : 77 %
Accuracy of truck : 66 %


In [29]:
print("Accuracy Train:")
print('\n'.join(map(str, epochfunctrain)))
print('\n'+'[epoch,accuracy]:')
for x in range(len(epochfunctrain)): 
    print ('\n'+'['+str(x+1)+','+ str(epochfunctrain[x])+']') 

Accuracy Train:
65.32
65.64
70.464
72.784
74.06
76.272
77.664
78.784
79.28
78.716
77.686
79.998
80.864
82.696
78.908
80.318
84.034
84.908
84.49
84.632
84.76
80.866
85.464
81.7
84.078
84.37
85.264
84.346
85.444
85.614
86.092
86.404
85.59
85.66
73.776
85.816
81.444
85.916
87.156
86.128
84.65
85.304
87.012
85.202
83.668
85.126
84.478
86.222
85.7
83.25
79.818
85.996
84.298
84.576
77.58
85.086
84.782
82.644
83.672
83.782
82.758
84.028
81.866
82.256
86.096
80.748
86.61
82.78
85.124
86.006

[epoch,accuracy]:

[1,65.32]

[2,65.64]

[3,70.464]

[4,72.784]

[5,74.06]

[6,76.272]

[7,77.664]

[8,78.784]

[9,79.28]

[10,78.716]

[11,77.686]

[12,79.998]

[13,80.864]

[14,82.696]

[15,78.908]

[16,80.318]

[17,84.034]

[18,84.908]

[19,84.49]

[20,84.632]

[21,84.76]

[22,80.866]

[23,85.464]

[24,81.7]

[25,84.078]

[26,84.37]

[27,85.264]

[28,84.346]

[29,85.444]

[30,85.614]

[31,86.092]

[32,86.404]

[33,85.59]

[34,85.66]

[35,73.776]

[36,85.816]

[37,81.444]

[38,85.916]

[39,87.156]

[40,8

In [30]:
print("Accuracy Test:")
print('\n'.join(map(str, epochfunctest)))
print('\n'+'[epoch,accuracy]:')
for x in range(len(epochfunctest)): 
    print ('\n'+'['+str(x+1)+','+ str(epochfunctest[x])+']') 

Accuracy Test:
61.0
60.76
63.63
63.59
64.38
64.31
64.9
64.49
63.46
62.29
60.83
62.55
62.8
63.11
60.9
61.02
62.02
62.58
62.32
62.05
61.48
59.12
61.12
59.78
60.26
59.58
59.86
60.5
60.45
59.97
60.0
60.74
60.22
60.1
54.67
59.88
57.67
58.86
60.63
59.57
58.15
59.17
59.58
59.6
59.23
58.72
58.23
58.81
59.62
58.89
57.02
58.95
57.86
58.95
56.33
58.43
58.03
58.52
58.7
58.5
57.89
58.26
57.75
57.55
58.98
57.06
59.53
58.29
58.99
58.16

[epoch,accuracy]:

[1,61.0]

[2,60.76]

[3,63.63]

[4,63.59]

[5,64.38]

[6,64.31]

[7,64.9]

[8,64.49]

[9,63.46]

[10,62.29]

[11,60.83]

[12,62.55]

[13,62.8]

[14,63.11]

[15,60.9]

[16,61.02]

[17,62.02]

[18,62.58]

[19,62.32]

[20,62.05]

[21,61.48]

[22,59.12]

[23,61.12]

[24,59.78]

[25,60.26]

[26,59.58]

[27,59.86]

[28,60.5]

[29,60.45]

[30,59.97]

[31,60.0]

[32,60.74]

[33,60.22]

[34,60.1]

[35,54.67]

[36,59.88]

[37,57.67]

[38,58.86]

[39,60.63]

[40,59.57]

[41,58.15]

[42,59.17]

[43,59.58]

[44,59.6]

[45,59.23]

[46,58.72]

[47,58.23]

[48,58.8