# Extension 03 // Add Skip Link Blocks to Neural Network

In [0]:
# set global Hyperparameters 
epochs = 10

## Load Both MNIST and CIFAR Dataset to temp-data in Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
from pathlib import Path
import os
import requests
import pickle

def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

os.chdir('/content/drive/My Drive')
print(os.getcwd())

# Move into primary Drive Folder
os.chdir('/content/drive/My Drive')

# Make temp-data directory and move inside
path_name = 'temp-data/'
Path("temp-data/").mkdir(parents=True, exist_ok=True)
os.chdir(path_name)
print('\nData directory created...')

print('Download starting...')
url = 'https://github.com/aobject/public-nyu-ml/blob/master/ML-Project/Data/cifar-10.p?raw=true'
target_path = 'cifar-10.p'
response = requests.get(url, stream=True)
if response.status_code == 200:
    with open(target_path, 'wb') as f:
        f.write(response.raw.read())

url = 'https://github.com/aobject/public-nyu-ml/blob/master/ML-Project/Data/mnist.p?raw=true'
target_path = 'mnist.p'
response = requests.get(url, stream=True)
if response.status_code == 200:
    with open(target_path, 'wb') as f:
        f.write(response.raw.read())
print('\nThe files in the data folder include:')
path = '.'
files = os.listdir(path)
for name in files:
    print(name)

digits = unpickle('mnist.p')
print('\nDigits data shape is {}'.format(digits['data'].shape))
cifar = unpickle('cifar-10.p')
cifar[b'data'] = cifar[b'data'][:1000]  # mabye increase to 1800 later
cifar[b'labels'] = cifar[b'labels'][:1000]
print('cifar data shape is {}'.format(cifar[b'data'].shape))

print('\nAll done, your data is ready to go!!!\n')

/content/drive/My Drive

Data directory created...
Download starting...

The files in the data folder include:
mnist.p
cifar-10.p

Digits data shape is (1797, 64)
cifar data shape is (1000, 3072)

All done, your data is ready to go!!!



## Part A ( i ) Baseline on Dataset I

## Vanilla Neural Network
## This is the example from the HW Notebook only modified to add more data collection

The base strategy we are going to start with is Neural Networks. This topic was was covered in the week 8. In this first section I will include the code given in homework 8. The performance will give us a benchmark we can work from. 

Our extension to the code in homework 8 will be to add a convolutional layers to the neural network. First, we will use a PyTorch implementation. Then, we will write our own implementation. 

Here is the Neural Network implemented in homework 8. 

In [0]:
# Import Libraries
from sklearn.preprocessing import StandardScaler  # It is important in neural networks to scale the date
from sklearn.model_selection import train_test_split  # The standard - train/test to prevent overfitting and choose hyperparameters
from sklearn.metrics import accuracy_score # 
import numpy as np
import numpy.random as r # We will randomly initialize our weights
import matplotlib.pyplot as plt 
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter, Legend
from bokeh.io import output_notebook
output_notebook()

X = digits['data']
print("The shape of the digits dataset:") 
print(digits['data'].shape)
y = digits.target

X_scale = StandardScaler()
X = X_scale.fit_transform(X)

#Split the data into training and test set.  60% training and %40 test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

def convert_y_to_vect(y):
    y_vect = np.zeros((len(y), 10))
    for i in range(len(y)):
        y_vect[i, y[i]] = 1
    return y_vect


# convert digits to vectors
y_v_train = convert_y_to_vect(y_train)
y_v_test = convert_y_to_vect(y_test)

def f(z):
    return 1 / (1 + np.exp(-z))


def f_deriv(z):
    return f(z) * (1 - f(z))

def setup_and_init_weights(nn_structure):
    W = {} #creating a dictionary i.e. a set of key: value pairs
    b = {}
    for l in range(1, len(nn_structure)):
        W[l] = r.random_sample((nn_structure[l], nn_structure[l-1])) #Return “continuous uniform” random floats in the half-open interval [0.0, 1.0). 
        b[l] = r.random_sample((nn_structure[l],))
    return W, b

def init_tri_values(nn_structure):
    tri_W = {}
    tri_b = {}
    for l in range(1, len(nn_structure)):
        tri_W[l] = np.zeros((nn_structure[l], nn_structure[l-1]))
        tri_b[l] = np.zeros((nn_structure[l],))
    return tri_W, tri_b

def feed_forward(x, W, b):
    a = {1: x} # create a dictionary for holding the a values for all levels
    z = { } # create a dictionary for holding the z values for all the layers
    for l in range(1, len(W) + 1): # for each layer
        node_in = a[l]
        z[l+1] = W[l].dot(node_in) + b[l]  # z^(l+1) = W^(l)*a^(l) + b^(l)
        # import pdb; pdb.set_trace()
        a[l+1] = f(z[l+1]) # a^(l+1) = f(z^(l+1))
    return a, z

def calculate_out_layer_delta(y, a_out, z_out):
    # delta^(nl) = -(y_i - a_i^(nl)) * f'(z_i^(nl))
    return -(y-a_out) * f_deriv(z_out) 


def calculate_hidden_delta(delta_plus_1, w_l, z_l):
    # delta^(l) = (transpose(W^(l)) * delta^(l+1)) * f'(z^(l))
    return np.dot(np.transpose(w_l), delta_plus_1) * f_deriv(z_l)

def predict_y(W, b, X, n_layers):
    N = X.shape[0]
    y = np.zeros((N,))
    for i in range(N):
        a, z = feed_forward(X[i, :], W, b)
        y[i] = np.argmax(a[n_layers])
    return y

def train_nn(nn_structure, X_train, y_train, X_test, y_test, iter_num=100, alpha=0.25):
    W, b = setup_and_init_weights(nn_structure)
    cnt = 0
    N = len(y_train)
    avg_cost_func = []

    loss_train_seq = []
    loss_test_seq = []
    acc_train_seq = []
    acc_test_seq = []
    train_examples = len(y_train)
    test_examples = len(y_test)

    print('Starting gradient descent for {} iterations'.format(iter_num))
    while cnt < iter_num:
        if cnt%1000 == 0:
            print('Epoch {} of {}'.format(cnt, iter_num))
        tri_W, tri_b = init_tri_values(nn_structure)
        avg_cost = 0

        loss_train = 0.0
        loss_test = 0.0
        acc_train = 0.0
        acc_test = 0.0


        for i in range(N):
            delta = {}
            # perform the feed forward pass and return the stored a and z values, to be used in the
            # gradient descent step
            a, z = feed_forward(X_train[i, :], W, b)
            # loop from nl-1 to 1 backpropagating the errors
            for l in range(len(nn_structure), 0, -1):
                if l == len(nn_structure):
                    delta[l] = calculate_out_layer_delta(y_train[i,:], a[l], z[l])
                    loss_train += np.linalg.norm((y_train[i,:]-a[l]))
                    if np.argmax(a[len(nn_structure)]) == np.argmax(y_v_train[i]):
                      acc_train += 1.0
                else:
                    if l > 1:
                        delta[l] = calculate_hidden_delta(delta[l+1], W[l], z[l])
                    # triW^(l) = triW^(l) + delta^(l+1) * transpose(a^(l))
                    tri_W[l] += np.dot(delta[l+1][:,np.newaxis], np.transpose(a[l][:,np.newaxis]))# np.newaxis increase the number of dimensions
                    # trib^(l) = trib^(l) + delta^(l+1)
                    tri_b[l] += delta[l+1]
        # perform the gradient descent step for the weights in each layer
        for l in range(len(nn_structure) - 1, 0, -1):
            W[l] += -alpha * (1.0/N * tri_W[l])
            b[l] += -alpha * (1.0/N * tri_b[l])
        
        # Test model

        # get y prediction
        # y_pred = predict_y(W, b, X_test, 3)
        
        for i in range(test_examples):
         
            a, z = feed_forward(X_test[i, :], W, b)
            # calculate loss
            loss_test += np.linalg.norm((y_test[i,:]-a[len(nn_structure)]))
            # calculate accuracy
            if np.argmax(a[len(nn_structure)]) == np.argmax(y_v_test[i]):
                acc_test += 1
            
        # complete the average cost calculation
        loss_train_seq.append(loss_train / train_examples)
        loss_test_seq.append(loss_test / test_examples)
        acc_train_seq.append(acc_train / train_examples)
        acc_test_seq.append(acc_test / test_examples)
        cnt += 1
    return W, b, (loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)


nn_structure = [64, 30, 10]
    
# train the NN
%time W, b, result_baseline_hwcode_mnist = train_nn(nn_structure, X_train, y_v_train, X_test, y_v_test, epochs, 0.25)

def plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq):
    print('The test set prediction accuracy is {}%'.format(acc_test_seq[-1] * 100))
    # import pdb; pdb.set_trace()
    source = ColumnDataSource(data={
        'epoch'            : range(1, len(loss_test_seq) + 1),
        'train_loss'    : loss_train_seq,
        'test_loss'        :  loss_test_seq,
    })

    p = figure(title='MNIST Loss: NN without CNN provide in HW', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_loss', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_loss', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "top_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Loss' 

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Loss', '@train_loss{0.000 a}'),
                  ('Test Loss', '@test_loss{0.000 a}'),
        ],

        mode='mouse'
    ))

    show(p)

    source = ColumnDataSource(data={
        'epoch'            : range(1, len(acc_train_seq) + 1),
        'train_acc'    : np.array(acc_train_seq),
        'test_acc'        :  np.array(acc_test_seq),
    })

    p = figure(title='MNIST Accuracy: NN without CNN provided in HW', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_acc', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_acc', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "bottom_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy' 
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Accuracy', '@{train_acc}{%0.2f}'),
                  ('Test Accuracy', '@{test_acc}{%0.2f}'),
        ],

        mode='mouse'
    ))

    show(p)

loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq = result_baseline_hwcode_mnist

plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)

The shape of the digits dataset:
(1797, 64)
Starting gradient descent for 10 iterations
Epoch 0 of 10
CPU times: user 1.14 s, sys: 0 ns, total: 1.14 s
Wall time: 1.14 s
The test set prediction accuracy is 10.43115438108484%


## Part ( i ) Continued: A Better Baseline
## Baseline MNIST Neural Network implimented to PyTorch without CNN Layers

In addition, I have implimented a baseline implimentation of a Neural Network without CNN Layers

In [0]:
# Import modules
import torch
import numpy as np
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import load_digits # The MNIST data set is in scikit learn data set
from sklearn.preprocessing import StandardScaler  # It is important in neural networks to scale the date
from sklearn.model_selection import train_test_split  # The standard - train/test to prevent overfitting and choose hyperparameters
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()

class MNISTDataset(Dataset):
    def __init__(self, data, label):
        self.data = data.reshape((-1,8,8,1))
        self.label = label

    def __len__(self):
        return len(self.label)

    def __getitem__(self, item):

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = self.data[item].transpose((2, 0, 1))
        image = torch.from_numpy(image)
        target = self.label[item]
        target = torch.from_numpy(target)
        return (image, target)


class MNIST(nn.Module):

    # Our batch shape for input x is (1, 8, 8)

    def __init__(self):
        super(MNIST, self).__init__()

        self.fc1 = nn.Linear(64, 30)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):

        x = x.view(-1, 8 * 8)
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x


def scorecard(output, labels):
    score = 0
    score = np.where(np.argmax(output, axis=1) == np.argmax(labels, axis=1), 1, 0)
    score = np.sum(score)
    return score



def trainModel(model, batch_size, num_epochs, learning_rate):
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", num_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)

    X = digits['data']
    y = digits['target']

    X_scale = StandardScaler()
    X = X_scale.fit_transform(X)

    # Split the data into training and test set.  60% training and %40 test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    def convert_y_to_vect(y):
        y_vect = np.zeros((len(y), 10))
        for i in range(len(y)):
            y_vect[i, y[i]] = 1
        return y_vect

    # convert digits to vectors
    y_v_train = convert_y_to_vect(y_train)
    y_v_test = convert_y_to_vect(y_test)

    train_dataset = MNISTDataset(X_train, y_v_train)
    test_dataset = MNISTDataset(X_test, y_v_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    torch.manual_seed(0)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(0)
    mean, std = (0.5,), (0.5,)

    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    model = model.double()

    loss_train_seq = []
    loss_test_seq = []
    acc_train_seq = []
    acc_test_seq = []

    for i in range(num_epochs):
        loss_train = 0.0
        loss_test = 0.0
        acc_train = 0.0
        acc_test = 0.0
        train_examples = 0
        test_examples = 0

        for j, (images, labels) in enumerate(train_loader, 0):
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            acc_train += scorecard(output.detach().numpy(), labels.detach().numpy())
            train_examples += len(labels)
            loss_train += loss.detach().numpy()
            loss.backward()
            optimizer.step()


        for images, labels in test_loader:

            with torch.no_grad():
                output = model(images)

                loss = criterion(output, labels)
                loss_test += loss.detach().numpy()
                acc_test += scorecard(output.detach().numpy(), labels.detach().numpy())
                test_examples += len(labels)
        loss_train_seq.append(loss_train / train_examples)
        loss_test_seq.append(loss_test / test_examples)
        acc_train_seq.append(acc_train / train_examples)
        acc_test_seq.append(acc_test / test_examples)
        print('Epoch {} with test loss {} and test accuracy {}\n\n'.format(i, (loss_test / test_examples),(acc_test / test_examples) ))

    return (loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)


def plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq):
    print('The test set prediction accuracy is {}%'.format(acc_test_seq[-1] * 100))
    source = ColumnDataSource(data={
        'epoch'            : range(1, len(loss_test_seq) + 1),
        'train_loss'    : loss_train_seq,
        'test_loss'        :  loss_test_seq,
    })

    p = figure(title='MNIST Loss: Baseline No Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_loss', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_loss', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "top_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Loss' 

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Loss', '@train_loss{0.000 a}'),
                  ('Test Loss', '@test_loss{0.000 a}'),
        ],

        mode='mouse'
    ))

    show(p)

    source = ColumnDataSource(data={
        'epoch'            : range(1, len(acc_train_seq) + 1),
        'train_acc'    : np.array(acc_train_seq),
        'test_acc'        :  np.array(acc_test_seq),
    })

    p = figure(title='MNIST Accuracy: Baseline No Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_acc', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_acc', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "bottom_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy' 
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Accuracy', '@{train_acc}{%0.2f}'),
                  ('Test Accuracy', '@{test_acc}{%0.2f}'),
        ],

        mode='mouse'
    ))

    show(p)

batch_size = 1
learning_rate = 0.01
num_epochs = epochs

model = MNIST()

%time result_baseline_pytorch_mnist = trainModel(model, batch_size, num_epochs, learning_rate)
loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq = result_baseline_pytorch_mnist





===== HYPERPARAMETERS =====
batch_size= 1
epochs= 10
learning_rate= 0.01
Epoch 0 with test loss 0.11502919348221854 and test accuracy 0.19471488178025034


Epoch 1 with test loss 0.09715069488358223 and test accuracy 0.25869262865090403


Epoch 2 with test loss 0.09224775918156689 and test accuracy 0.3240611961057024


Epoch 3 with test loss 0.09021937660023159 and test accuracy 0.38247566063977745


Epoch 4 with test loss 0.08915112824313293 and test accuracy 0.43949930458970793


Epoch 5 with test loss 0.08848065398151346 and test accuracy 0.4659248956884562


Epoch 6 with test loss 0.08799633431971698 and test accuracy 0.4909596662030598


Epoch 7 with test loss 0.08760471187947753 and test accuracy 0.5006954102920723


Epoch 8 with test loss 0.08725952544770134 and test accuracy 0.5173852573018081


Epoch 9 with test loss 0.08693575525240692 and test accuracy 0.5285118219749653


CPU times: user 8.18 s, sys: 443 ms, total: 8.62 s
Wall time: 8.6 s


In [0]:
plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)

The test set prediction accuracy is 52.85118219749653%


## Part A ( ii ) Numpy Extension on Dataset I

In [0]:
# import numpy as np
import time
import numpy as np
from bokeh.plotting import figure, output_file, show, save
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()
import pickle


def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

class CNN:

    def __init__(self, image_size, num_kernels, size_kernels, padding_kernel, pool_size, out_nodes, seed):
        self.kernels = self.Kernels(num_kernels, size_kernels, padding_kernel)  # 8x8x1  to 6x6x10
        self.pool = self.Pool(pool_size)  # 6x6x10 to 3x3x10
        self.relu = self.ReLu(image_size*image_size+image_size//pool_size * image_size//pool_size * num_kernels, out_nodes)  # 6x6x10 to 10
        np.random.seed(seed)
        self.image_size = image_size
        self.num_kernels = num_kernels
        self.size_kernels = size_kernels
        self.padding_kernel = padding_kernel
        self.pool_size = pool_size
        self.out_nodes = out_nodes

    class Kernels:
        # The set of kernels and kernel functions

        def __init__(self, num_kernels, size_kernel, padding):
            self.num_kernels = num_kernels
            self.size_kernel = size_kernel
            self.padding = padding
            self.kernels = np.random.randn(num_kernels, self.size_kernel, self.size_kernel) / (self.size_kernel ** 2)
            self.cur_x = np.zeros((1))
            self.cur_x_pad = np.zeros((1))

        def forward(self, x):
            self.cur_x = x

            h_in, w_in = x.shape
            h_out = h_in - 2 + (self.padding*2)
            w_out = w_in - 2 + (self.padding*2)
            result = np.zeros((h_out, w_out, self.num_kernels))
            h_out, w_out, _ = result.shape

            # append 0 padding
            self.cur_x_pad = np.concatenate((np.zeros((h_in,1)), self.cur_x, np.zeros((h_in,1))), axis=1)
            self.cur_x_pad = np.concatenate((np.zeros((1,w_in+(self.padding*2))), self.cur_x_pad, np.zeros((1,w_in+(self.padding*2)))), axis=0)
            for i in range(h_out):
                for j in range(w_out):
                    block = self.cur_x_pad[i:(i+self.size_kernel), j:(j+self.size_kernel)]
                    result[i,j] = np.sum(block * self.kernels, axis=(1, 2))
            return result

        def back(self, delta_cnn, alpha):
            height, width = self.cur_x.shape
            dk = np.zeros(self.kernels.shape)
            delta_cnn = delta_cnn.reshape(height, width,self.num_kernels)
            for i in range(height):
                for j in range(width):
                    block_x = self.cur_x_pad[i:(i+self.size_kernel), j:(j+self.size_kernel)]
                    # block_delta = delta_cnn[i:(i+3), j:(j+3)]
                    for k in range(self.num_kernels):
                        dk[k] += delta_cnn[i,j,k] * block_x

            self.kernels += -alpha * dk


    class Pool:
        # use max pooling with size 2x2
        def __init__(self, pool_size):
            self.dim = pool_size  # size of the pool
            self.cur_pool_input = np.zeros((1))

        def forward(self, x):
            self.cur_pool_input = x
            height, width, num_k = x.shape
            result = np.zeros((height // self.dim, width // self.dim, num_k))
            for i in range(height // self.dim):
                for j in range(width // self.dim):
                    block = x[(i*self.dim):(i*self.dim+self.dim), (j*self.dim):(j*self.dim+self.dim)]
                    result[i,j] = np.amax(block, axis=(0,1))
            return result

        def back(self, delta_pool):
            height, width, num_k = self.cur_pool_input.shape
            delta_pool = delta_pool.reshape((height//2, width//2, num_k))
            delta_cnn = np.zeros((height, width, num_k))

            for i in range(height // self.dim):
                for j in range(width // self.dim):
                    block = self.cur_pool_input[(i*self.dim):(i*self.dim+self.dim),(j*self.dim):(j*self.dim+self.dim)]
                    b_h, b_w, b_k = block.shape
                    maximum = np.amax(block, axis=(0,1))
                    for s in range(b_h):
                        for t in range(b_w):
                            for u in range(b_k):
                                if block[s, t, u] == maximum[u]:
                                    delta_cnn[i*2+s, j*2+t, u] = delta_pool[i,j,u]
            return delta_cnn


    class ReLu:
        # Use a fully connected layer using sigmoid activation

        def __init__(self, in_nodes, out_nodes):
            self.w = np.random.randn(out_nodes, in_nodes) / in_nodes
            self.b = np.zeros((out_nodes, 1))
            self.y_vector = np.zeros((1))
            self.z_cnn = np.zeros((1))
            self.a_cnn = np.zeros((1))
            self.z_out = np.zeros((1))
            self.a_out = np.zeros((1))
            self.image_size = 0


        def f(self, z):
            return np.where(z > 0, z, z * 0.2)

        def d_f(self, z):
            test = np.where(z>0, 1.0, 0.2)
            return np.where(z>0, 1.0, 0.2)

        def loss(self, y, yhat):
            self.y_vector = np.zeros((len(yhat), 1))
            self.y_vector[y, 0] = 1.0
            loss = np.linalg.norm(self.y_vector - yhat)
            accuracy = 1 if np.argmax(yhat[:, 0]) == y else 0
            return loss, accuracy

        def forward(self, image, x):
            # TODO setup to receive pool size
            h, w = image.shape
            self.image_size = h * w
            self.z_cnn = np.concatenate((image.reshape(-1, 1),  x.reshape(-1, 1)), axis=0)
            self.a_cnn = self.f(self.z_cnn)

            self.z_out = np.matmul(self.w, self.a_cnn) + self.b
            self.a_out = self.f(self.z_out)

            return self.a_out

        def back(self, alpha):

            w_grad = np.zeros(self.w.shape)
            b_grad = np.zeros(self.b.shape)

            # get delta for out layer
            delta_out = -(self.y_vector - self.a_out) * self.d_f(self.z_out)

            # get delta for hidden layer
            delta_pool = np.dot(self.w.T, delta_out) * self.d_f(self.z_cnn)

            w_grad += -alpha * np.matmul(delta_out, self.a_cnn.T)
            b_grad += -alpha * delta_out

            return delta_pool[self.image_size:]



def get_data(in_data):
    X = in_data['data']
    y = in_data['target']

    class Scaler():
        def __init__(self, training_data):
            self.u = np.mean(training_data)
            self.s = np.std(training_data)

        def scale_data(self, in_data):
            return (in_data - self.u) / self.s

    X_scale = Scaler(X)
    X = X_scale.scale_data(X)

    def split(in_X, in_y, percentage_test):
        data_size = len(in_y)
        order = np.arange(0, data_size, dtype=int)
        np.random.shuffle(order)
        out_X = in_X[order]
        out_y = np.array(in_y)[order].tolist()
        split = int(np.rint(data_size * percentage_test))
        return in_X[split:], in_X[0:split], in_y[split:], in_y[0:split]

    # Split the data into training and test set.  60% training and %40 test
    X_train, X_test, y_train, y_test = split(X, y, 0.4)

    # Reshape X_train and X_test to be (-1, 8, 8)
    X_train = X_train.reshape((-1, 8, 8))
    X_test = X_test.reshape((-1, 8, 8))
    return (X_train, X_test, y_train, y_test)

def shuffle(X, y):
    new_order = np.random.permutation(len(y))
    X = X[new_order]
    y = y[new_order]
    return X, y

def forward(x, y, model):
    result = model.kernels.forward(x)
    result = model.pool.forward(result)
    y_hat = model.relu.forward(x, result)
    loss, accuracy = model.relu.loss(y, y_hat)
    return y_hat, loss, accuracy


def train(x, y, model, alpha=0.1):
    # forward
    _, loss, accuracy = forward(x, y, model)

    # back
    delta = model.relu.back(alpha)
    delta = model.pool.back(delta)
    model.kernels.back(delta, alpha)

    return loss, accuracy


def plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq):
    print('The test set prediction accuracy is {}%'.format(acc_test_seq[-1] * 100))
    source = ColumnDataSource(data={
        'epoch'            : range(1, len(loss_test_seq) + 1),
        'train_loss'    : loss_train_seq,
        'test_loss'        :  loss_test_seq,
    })

    p = figure(title='MNIST Loss: NumPy Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_loss', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_loss', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "top_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Loss'

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Loss', '@train_loss{0.000 a}'),
                  ('Test Loss', '@test_loss{0.000 a}'),
        ],

        mode='mouse'
    ))

    show(p)

    source = ColumnDataSource(data={
        'epoch'            : range(1, len(acc_train_seq) + 1),
        'train_acc'    : np.array(acc_train_seq),
        'test_acc'        :  np.array(acc_test_seq),
    })

    p = figure(title='MNIST Accuracy: NumPy Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_acc', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_acc', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "bottom_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy'
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Accuracy', '@{train_acc}{%0.2f}'),
                  ('Test Accuracy', '@{test_acc}{%0.2f}'),
        ],

        mode='mouse'
    ))

    show(p)



def train_model(in_data, epochs):
    image_size = 8  # Images are 8 x 8
    kernel_layers = 64
    kernel_size = 3
    kernal_padding = 1
    pool_size = 2

    out_nodes = 10
    alpha = 0.4
    seed = 33

    X_train, X_test, y_train, y_test = get_data(in_data)

    model = CNN(image_size, kernel_layers, kernel_size, kernal_padding, pool_size, out_nodes, seed)


    loss_train_seq = []
    loss_test_seq = []
    acc_train_seq = []
    acc_test_seq = []
    train_examples = len(y_train)
    test_examples = len(y_test)

    print('=' * 30)
    print('==== Hyperparameters ====')
    print('='*30)
    print('Learning Rate: {}'.format(alpha))
    print('Kernel Layers: {}'.format(kernel_layers))
    # print('Pool Size: {}'.format(pool_size))
    print('Epochs: {}'.format(epochs))
    print('Seed: {}'.format(seed))
    print('=' * 30 + '\n')
    for epoch in range(epochs):
        print('=' * 30)
        print('Starting epoch {} of {}'.format(epoch, epochs))
        X_train, y_train = shuffle(X_train, y_train)

        loss_train = 0.0
        loss_test = 0.0
        acc_train = 0.0
        acc_test = 0.0

        for i, (image, label) in enumerate(zip(X_train, y_train)):
            loss, acc = train(image, label, model, alpha)
            loss_train += loss
            acc_train += acc


        for i, (image, label) in enumerate(zip(X_test, y_test)):
            _, loss, acc = forward(image, label, model)
            loss_test += loss
            acc_test += acc

        loss_train_seq.append(loss_train / train_examples)
        loss_test_seq.append(loss_test / test_examples)
        acc_train_seq.append(acc_train / train_examples)
        acc_test_seq.append(acc_test / test_examples)

        print('Test Loss: {}\nTest Accuracy: {} %'.format(loss_test_seq[-1], acc_test_seq[-1] * 100))
        print('Train Loss: {}\nTrain Accuracy: {} %\n\n'.format(loss_train_seq[-1], acc_train_seq[-1] * 100))

    return (loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)


# % time
% time results_skip_numpy_mnist = train_model(digits, epochs)
loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq = results_skip_numpy_mnist




==== Hyperparameters ====
Learning Rate: 0.4
Kernel Layers: 64
Epochs: 10
Seed: 33

Starting epoch 0 of 10
Test Loss: 0.9273836531064986
Test Accuracy: 53.546592489568845 %
Train Loss: 0.9576612936593961
Train Accuracy: 41.65120593692022 %


Starting epoch 1 of 10
Test Loss: 0.8896873975011635
Test Accuracy: 67.59388038942976 %
Train Loss: 0.9066198808685438
Train Accuracy: 68.55287569573284 %


Starting epoch 2 of 10
Test Loss: 0.8618548344827912
Test Accuracy: 73.15716272600834 %
Train Loss: 0.8729858704718679
Train Accuracy: 75.23191094619665 %


Starting epoch 3 of 10
Test Loss: 0.8384463166943574
Test Accuracy: 76.49513212795549 %
Train Loss: 0.845792519357839
Train Accuracy: 80.0556586270872 %


Starting epoch 4 of 10
Test Loss: 0.8180884416598465
Test Accuracy: 77.74687065368568 %
Train Loss: 0.8220358298443555
Train Accuracy: 82.93135435992579 %


Starting epoch 5 of 10
Test Loss: 0.7999456490177841
Test Accuracy: 79.55493741307372 %
Train Loss: 0.80106586501483
Train Accuracy:

In [0]:
plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)

The test set prediction accuracy is 81.08484005563282%


## Part B ( i ) Baseline on Dataset II

In [0]:
# Import Libraries
from sklearn.metrics import accuracy_score #
import numpy as np
import numpy.random as r # We will randomly initialize our weights
import matplotlib.pyplot as plt
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()
import pickle


# Import modules
import torch
import pickle
import numpy as np
import torch.nn.functional as F
from torch import nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()

# TODO Load Dataset
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

class MNISTDataset(Dataset):
    def __init__(self, data, label):
        self.data = data.reshape((-1, 32, 32, 3))
        self.label = label

    def __len__(self):
        return len(self.label)

    def __getitem__(self, item):
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = self.data[item].transpose((2, 0, 1))
        image = torch.from_numpy(image)
        target = self.label[item]
        target = torch.from_numpy(target)
        return (image, target)


class MNIST(nn.Module):

    # Our batch shape for input x is (1, 8, 8)

    def __init__(self):
        super(MNIST, self).__init__()

        self.fc1 = nn.Linear(3072, 30)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):

        x = x.view(-1, 32 * 32 * 3)

        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))

        return x


def scorecard(output, labels):
    score = 0
    score = np.where(np.argmax(output, axis=1) == np.argmax(labels, axis=1), 1, 0)

    score = np.sum(score)
    return score

def trainModel(model, batch_size, num_epochs, learning_rate):
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", num_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)

    X = cifar[b'data']
    y = cifar[b'labels']

    class Scaler():
      def __init__(self, training_data):
        self.u = np.mean(training_data)
        self.s = np.std(training_data)

      def scale_data(self, in_data):
        return (in_data - self.u) / self.s

    X_scale = Scaler(X)
    X = X_scale.scale_data(X)

    def split(in_X, in_y, percentage_test):
        data_size = len(in_y)
        order = np.arange(0, data_size, dtype=int)
        np.random.shuffle(order)
        out_X = in_X[order]
        out_y = np.array(in_y)[order].tolist()
        split = int(np.rint(data_size * percentage_test))
        return in_X[split:], in_X[0:split], in_y[split:], in_y[0:split]

    # Split the data into training and test set.  60% training and %40 test
    X_train, X_test, y_train, y_test = split(X, y, 0.4)

    def convert_y_to_vect(y):
        y_vect = np.zeros((len(y), 10))
        for i in range(len(y)):
            y_vect[i, y[i]] = 1
        return y_vect

    # convert digits to vectors
    y_v_train = convert_y_to_vect(y_train)
    y_v_test = convert_y_to_vect(y_test)

    train_dataset = MNISTDataset(X_train, y_v_train)
    test_dataset = MNISTDataset(X_test, y_v_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    torch.manual_seed(0)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(0)
    mean, std = (0.5,), (0.5,)

    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    model = model.double()

    loss_train_seq = []
    loss_test_seq = []
    acc_train_seq = []
    acc_test_seq = []

    for i in range(num_epochs):
        loss_train = 0.0
        loss_test = 0.0
        acc_train = 0.0
        acc_test = 0.0
        train_examples = 0
        test_examples = 0

        for j, (images, labels) in enumerate(train_loader, 0):
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            acc_train += scorecard(output.detach().numpy(), labels.detach().numpy())
            train_examples += len(labels)
            loss_train += loss.detach().numpy()
            loss.backward()
            optimizer.step()

        for images, labels in test_loader:
            with torch.no_grad():
                output = model(images)

                loss = criterion(output, labels)
                loss_test += loss.detach().numpy()
                acc_test += scorecard(output.detach().numpy(), labels.detach().numpy())
                test_examples += len(labels)
        loss_train_seq.append(loss_train / train_examples)
        loss_test_seq.append(loss_test / test_examples)
        acc_train_seq.append(acc_train / train_examples)
        acc_test_seq.append(acc_test / test_examples)
        print('Epoch {} with test loss {} and test accuracy {}\n\n'.format(i, (loss_test / test_examples),
                                                                            (acc_test / test_examples)))

    return (loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)


def plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq):
    print('The test set prediction accuracy is {}%'.format(acc_test_seq[-1] * 100))
    # import pdb; pdb.set_trace()
    source = ColumnDataSource(data={
        'epoch': range(1, len(loss_test_seq) + 1),
        'train_loss': loss_train_seq,
        'test_loss': loss_test_seq,
    })

    p = figure(title='CIFAR Loss: Baseline No Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_loss', color='#329fe3', line_alpha=0.8, line_width=2, legend_label="Train",
            source=source)
    p.line(x='epoch', y='test_loss', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test",
            source=source)
    p.legend.location = "top_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Loss'

    p.add_tools(HoverTool(
        tooltips=[
            ('Epochs', '@epoch{int}'),
            ('Training Loss', '@train_loss{0.000 a}'),
            ('Test Loss', '@test_loss{0.000 a}'),
        ],

        mode='mouse'
    ))

    show(p)

    source = ColumnDataSource(data={
        'epoch': range(1, len(acc_train_seq) + 1),
        'train_acc': np.array(acc_train_seq),
        'test_acc': np.array(acc_test_seq),
    })

    p = figure(title='CIFAR Accuracy: Baseline No Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_acc', color='#329fe3', line_alpha=0.8, line_width=2, legend_label="Train",
            source=source)
    p.line(x='epoch', y='test_acc', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test",
            source=source)
    p.legend.location = "bottom_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy'
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')

    p.add_tools(HoverTool(
        tooltips=[
            ('Epochs', '@epoch{int}'),
            ('Training Accuracy', '@{train_acc}{%0.2f}'),
            ('Test Accuracy', '@{test_acc}{%0.2f}'),
        ],

        mode='mouse'
    ))

    show(p)


batch_size = 1
learning_rate = 0.01
num_epochs = epochs

model = MNIST()

# % time
result_baseline_pytorch_cifar = trainModel(model, batch_size, num_epochs, learning_rate)
loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq = result_baseline_pytorch_cifar





===== HYPERPARAMETERS =====
batch_size= 1
epochs= 10
learning_rate= 0.01
Epoch 0 with test loss 0.13420532843709576 and test accuracy 0.21


Epoch 1 with test loss 0.10685104106848753 and test accuracy 0.2525


Epoch 2 with test loss 0.098001936511202 and test accuracy 0.2675


Epoch 3 with test loss 0.09402676346568203 and test accuracy 0.255


Epoch 4 with test loss 0.09185360652123706 and test accuracy 0.25


Epoch 5 with test loss 0.09050148834722976 and test accuracy 0.2525


Epoch 6 with test loss 0.08957788263412651 and test accuracy 0.25


Epoch 7 with test loss 0.0889003761299006 and test accuracy 0.2525


Epoch 8 with test loss 0.08837471453487866 and test accuracy 0.255


Epoch 9 with test loss 0.0879481642834481 and test accuracy 0.255




In [0]:
plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)

The test set prediction accuracy is 25.5%


## Part B ( ii ) Numpy Extension on Dataset II

In [0]:
# import numpy as np
import time
import numpy as np
from bokeh.plotting import figure, output_file, show, save
from bokeh.models import ColumnDataSource, HoverTool, NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()
import pickle


def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

class CNN:

    def __init__(self, image_size, num_kernels, size_kernels, padding_kernel, pool_size, out_nodes, seed):
        self.kernels = self.Kernels(num_kernels, size_kernels, padding_kernel)  # 8x8x1  to 6x6x10
        self.pool = self.Pool(pool_size)  # 6x6x10 to 3x3x10
        self.relu = self.ReLu(image_size*image_size+image_size//pool_size * image_size//pool_size * num_kernels, out_nodes)  # 6x6x10 to 10
        np.random.seed(seed)
        self.image_size = image_size
        self.num_kernels = num_kernels
        self.size_kernels = size_kernels
        self.padding_kernel = padding_kernel
        self.pool_size = pool_size
        self.out_nodes = out_nodes

    class Kernels:
        # The set of kernels and kernel functions

        def __init__(self, num_kernels, size_kernel, padding):
            self.num_kernels = num_kernels
            self.size_kernel = size_kernel
            self.padding = padding
            self.kernels = np.random.randn(num_kernels, self.size_kernel, self.size_kernel) / (self.size_kernel ** 2)
            self.cur_x = np.zeros((1))
            self.cur_x_pad = np.zeros((1))

        def forward(self, x):
            self.cur_x = x

            h_in, w_in = x.shape
            h_out = h_in - 2 + (self.padding*2)
            w_out = w_in - 2 + (self.padding*2)
            result = np.zeros((h_out, w_out, self.num_kernels))
            h_out, w_out, _ = result.shape

            # append 0 padding
            self.cur_x_pad = np.concatenate((np.zeros((h_in,1)), self.cur_x, np.zeros((h_in,1))), axis=1)
            self.cur_x_pad = np.concatenate((np.zeros((1,w_in+(self.padding*2))), self.cur_x_pad, np.zeros((1,w_in+(self.padding*2)))), axis=0)
            for i in range(h_out):
                for j in range(w_out):
                    block = self.cur_x_pad[i:(i+self.size_kernel), j:(j+self.size_kernel)]
                    result[i,j] = np.sum(block * self.kernels, axis=(1, 2))
            return result

        def back(self, delta_cnn, alpha):
            height, width = self.cur_x.shape
            dk = np.zeros(self.kernels.shape)
            delta_cnn = delta_cnn.reshape(height, width,self.num_kernels)
            for i in range(height):
                for j in range(width):
                    block_x = self.cur_x_pad[i:(i+self.size_kernel), j:(j+self.size_kernel)]
                    # block_delta = delta_cnn[i:(i+3), j:(j+3)]
                    for k in range(self.num_kernels):
                        dk[k] += delta_cnn[i,j,k] * block_x

            self.kernels += -alpha * dk


    class Pool:
        # use max pooling with size 2x2
        def __init__(self, pool_size):
            self.dim = pool_size  # size of the pool
            self.cur_pool_input = np.zeros((1))

        def forward(self, x):
            self.cur_pool_input = x
            height, width, num_k = x.shape
            result = np.zeros((height // self.dim, width // self.dim, num_k))
            for i in range(height // self.dim):
                for j in range(width // self.dim):
                    block = x[(i*self.dim):(i*self.dim+self.dim), (j*self.dim):(j*self.dim+self.dim)]
                    result[i,j] = np.amax(block, axis=(0,1))
            return result

        def back(self, delta_pool):
            height, width, num_k = self.cur_pool_input.shape
            delta_pool = delta_pool.reshape((height//2, width//2, num_k))
            delta_cnn = np.zeros((height, width, num_k))

            for i in range(height // self.dim):
                for j in range(width // self.dim):
                    block = self.cur_pool_input[(i*self.dim):(i*self.dim+self.dim),(j*self.dim):(j*self.dim+self.dim)]
                    b_h, b_w, b_k = block.shape
                    maximum = np.amax(block, axis=(0,1))
                    for s in range(b_h):
                        for t in range(b_w):
                            for u in range(b_k):
                                if block[s, t, u] == maximum[u]:
                                    delta_cnn[i*2+s, j*2+t, u] = delta_pool[i,j,u]
            return delta_cnn


    class ReLu:
        # Use a fully connected layer using sigmoid activation

        def __init__(self, in_nodes, out_nodes):
            self.w = np.random.randn(out_nodes, in_nodes) / in_nodes
            self.b = np.zeros((out_nodes, 1))
            self.y_vector = np.zeros((1))
            self.z_cnn = np.zeros((1))
            self.a_cnn = np.zeros((1))
            self.z_out = np.zeros((1))
            self.a_out = np.zeros((1))
            self.image_size = 0


        def f(self, z):
            return np.where(z > 0, z, z * 0.2)

        def d_f(self, z):
            test = np.where(z>0, 1.0, 0.2)
            return np.where(z>0, 1.0, 0.2)

        def loss(self, y, yhat):
            self.y_vector = np.zeros((len(yhat), 1))
            self.y_vector[y, 0] = 1.0
            loss = np.linalg.norm(self.y_vector - yhat)
            accuracy = 1 if np.argmax(yhat[:, 0]) == y else 0
            return loss, accuracy

        def forward(self, image, x):
            # TODO setup to receive pool size
            h, w = image.shape
            self.image_size = h * w
            self.z_cnn = np.concatenate((image.reshape(-1, 1),  x.reshape(-1, 1)), axis=0)
            self.a_cnn = self.f(self.z_cnn)

            self.z_out = np.matmul(self.w, self.a_cnn) + self.b
            self.a_out = self.f(self.z_out)

            return self.a_out

        def back(self, alpha):

            w_grad = np.zeros(self.w.shape)
            b_grad = np.zeros(self.b.shape)

            # get delta for out layer
            delta_out = -(self.y_vector - self.a_out) * self.d_f(self.z_out)

            # get delta for hidden layer
            delta_pool = np.dot(self.w.T, delta_out) * self.d_f(self.z_cnn)

            w_grad += -alpha * np.matmul(delta_out, self.a_cnn.T)
            b_grad += -alpha * delta_out

            return delta_pool[self.image_size:]



def get_data(in_data):
    X = in_data[b'data']
    y = in_data[b'labels']

    X = X.reshape((-1, 32, 32, 3))
    X = np.sum(X,axis=3)
    X = X.reshape((-1, 1024))

    class Scaler():
        def __init__(self, training_data):
            self.u = np.mean(training_data)
            self.s = np.std(training_data)

        def scale_data(self, in_data):
            return (in_data - self.u) / self.s

    X_scale = Scaler(X)
    X = X_scale.scale_data(X)

    def split(in_X, in_y, percentage_test):
        data_size = len(in_y)
        order = np.arange(0, data_size, dtype=int)
        np.random.shuffle(order)
        out_X = in_X[order]
        out_y = np.array(in_y)[order].tolist()
        split = int(np.rint(data_size * percentage_test))
        return in_X[split:], in_X[0:split], in_y[split:], in_y[0:split]

    # Split the data into training and test set.  60% training and %40 test
    X_train, X_test, y_train, y_test = split(X, y, 0.4)

    # Reshape X_train and X_test to be (-1, 8, 8)
    X_train = X_train.reshape((-1, 32, 32))
    X_test = X_test.reshape((-1, 32, 32))
    return (X_train, X_test, y_train, y_test)

def shuffle(X, y):
    data_size = len(y)
    order = np.arange(0, data_size, dtype=int)
    np.random.shuffle(order)
    return X[order], np.array(y)[order].tolist()

def forward(x, y, model):
    result = model.kernels.forward(x)
    result = model.pool.forward(result)
    y_hat = model.relu.forward(x, result)
    loss, accuracy = model.relu.loss(y, y_hat)
    return y_hat, loss, accuracy


def train(x, y, model, alpha=0.1):
    # forward
    _, loss, accuracy = forward(x, y, model)

    # back
    delta = model.relu.back(alpha)
    delta = model.pool.back(delta)
    model.kernels.back(delta, alpha)

    return loss, accuracy


def plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq):
    print('The test set prediction accuracy is {}%'.format(acc_test_seq[-1] * 100))
    source = ColumnDataSource(data={
        'epoch'            : range(1, len(loss_test_seq) + 1),
        'train_loss'    : loss_train_seq,
        'test_loss'        :  loss_test_seq,
    })

    p = figure(title='CIFAR Loss: NumPy Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_loss', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_loss', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "top_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Loss'

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Loss', '@train_loss{0.000 a}'),
                  ('Test Loss', '@test_loss{0.000 a}'),
        ],

        mode='mouse'
    ))

    show(p)

    source = ColumnDataSource(data={
        'epoch'            : range(1, len(acc_train_seq) + 1),
        'train_acc'    : np.array(acc_train_seq),
        'test_acc'        :  np.array(acc_test_seq),
    })

    p = figure(title='CIFAR Accuracy: NumPy Skip Link', plot_width=400, plot_height=400)

    p.line(x='epoch', y='train_acc', color='#329fe3', line_alpha=0.8 , line_width=2, legend_label="Train", source=source)
    p.line(x='epoch', y='test_acc', color='#e33270', line_alpha=0.8, line_width=2, legend_label="Test", source=source)
    p.legend.location = "bottom_right"
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy'
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('Training Accuracy', '@{train_acc}{%0.2f}'),
                  ('Test Accuracy', '@{test_acc}{%0.2f}'),
        ],

        mode='mouse'
    ))

    show(p)



def train_model(in_data, epochs):
    image_size = 32  # Images are 8 x 8
    kernel_layers = 32
    kernel_size = 3
    kernal_padding = 1
    pool_size = 2

    out_nodes = 10
    alpha = 0.4
    seed = 0

    X_train, X_test, y_train, y_test = get_data(in_data)

    model = CNN(image_size, kernel_layers, kernel_size, kernal_padding, pool_size, out_nodes, seed)


    loss_train_seq = []
    loss_test_seq = []
    acc_train_seq = []
    acc_test_seq = []
    train_examples = len(y_train)
    test_examples = len(y_test)

    print('=' * 30)
    print('==== Hyperparameters ====')
    print('='*30)
    print('Learning Rate: {}'.format(alpha))
    print('Kernel Layers: {}'.format(kernel_layers))
    # print('Pool Size: {}'.format(pool_size))
    print('Epochs: {}'.format(epochs))
    print('Seed: {}'.format(seed))
    print('=' * 30 + '\n')
    for epoch in range(epochs):
        print('=' * 30)
        print('Starting epoch {} of {}'.format(epoch, epochs))
        X_train, y_train = shuffle(X_train, y_train)

        loss_train = 0.0
        loss_test = 0.0
        acc_train = 0.0
        acc_test = 0.0

        for i, (image, label) in enumerate(zip(X_train, y_train)):
            loss, acc = train(image, label, model, alpha)
            loss_train += loss
            acc_train += acc


        for i, (image, label) in enumerate(zip(X_test, y_test)):
            _, loss, acc = forward(image, label, model)
            loss_test += loss
            acc_test += acc

        loss_train_seq.append(loss_train / train_examples)
        loss_test_seq.append(loss_test / test_examples)
        acc_train_seq.append(acc_train / train_examples)
        acc_test_seq.append(acc_test / test_examples)

        print('Test Loss: {}\nTest Accuracy: {} %'.format(loss_test_seq[-1], acc_test_seq[-1] * 100))
        print('Train Loss: {}\nTrain Accuracy: {} %\n\n'.format(loss_train_seq[-1], acc_train_seq[-1] * 100))

    return (loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)


# % time
%time results_skip_numpy_cifar_two = train_model(cifar, epochs)
loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq = results_skip_numpy_cifar_two




==== Hyperparameters ====
Learning Rate: 0.4
Kernel Layers: 32
Epochs: 10
Seed: 0

Starting epoch 0 of 10
Test Loss: 0.996197982025551
Test Accuracy: 15.25 %
Train Loss: 0.9975671803026311
Train Accuracy: 12.0 %


Starting epoch 1 of 10
Test Loss: 0.993080422236026
Test Accuracy: 14.75 %
Train Loss: 0.9947202774451455
Train Accuracy: 14.666666666666666 %


Starting epoch 2 of 10
Test Loss: 0.9897850816080676
Test Accuracy: 14.75 %
Train Loss: 0.9912954709173545
Train Accuracy: 15.666666666666668 %


Starting epoch 3 of 10
Test Loss: 0.986376499395907
Test Accuracy: 15.25 %
Train Loss: 0.9876504407535477
Train Accuracy: 17.5 %


Starting epoch 4 of 10
Test Loss: 0.9831298171120073
Test Accuracy: 15.75 %
Train Loss: 0.983968059226083
Train Accuracy: 17.5 %


Starting epoch 5 of 10
Test Loss: 0.9801204015350126
Test Accuracy: 16.0 %
Train Loss: 0.9804562359235818
Train Accuracy: 18.5 %


Starting epoch 6 of 10
Test Loss: 0.9773787220506124
Test Accuracy: 16.25 %
Train Loss: 0.977224851305

In [0]:
loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq = results_skip_numpy_cifar_two
plot_results(loss_train_seq, loss_test_seq, acc_train_seq, acc_test_seq)

The test set prediction accuracy is 15.75%


## Summary of Results

In [0]:
def plot_results_compare_set_one(baseline, new_benchmark):
    
    loss_train_base, loss_test_base, acc_train_base, acc_test_base = baseline
    loss_train_new, loss_test_new, acc_train_new, acc_test_new = new_benchmark

    source = ColumnDataSource(data={
        'epoch'            : range(1, len(acc_test_base) + 1),
        'test_acc_base'        :  np.array(acc_test_base),
        'test_acc_new'        :  np.array(acc_test_new),
    })

    p = figure(title='MNIST Accuracy: With and Without Skip Link', plot_width=600, plot_height=400)

    l_van = p.line(x='epoch', y='test_acc_base', color='#329fe3', line_alpha=0.8 , line_width=2, source=source)
    l_torch = p.line(x='epoch', y='test_acc_new', color='#e33270', line_alpha=0.8, line_width=2, source=source)
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy'
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')
    

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('No Skip Link', '@{test_acc_base}{%0.2f}'),
                  ('NumPy Skip Link', '@{test_acc_new}{%0.2f}'),
        ],

        mode='mouse'
    ))

    legend = Legend(items=[('No Skip Link', [l_van]), ('NumPy Skip Link', [l_torch]),], location=(0,250))
    p.add_layout(legend, 'left')

    show(p)




In [0]:
def plot_results_compare_set_two(baseline, new_benchmark):
    
    loss_train_base, loss_test_base, acc_train_base, acc_test_base = baseline
    loss_train_new, loss_test_new, acc_train_new, acc_test_new = new_benchmark

    source = ColumnDataSource(data={
        'epoch'            : range(1, len(acc_test_base) + 1),
        'test_acc_base'        :  np.array(acc_test_base),
        'test_acc_new'        :  np.array(acc_test_new),
    })

    p = figure(title='CIFAR Accuracy: With and Without Skip Link', plot_width=600, plot_height=400)

    l_van = p.line(x='epoch', y='test_acc_base', color='#329fe3', line_alpha=0.8 , line_width=2, source=source)
    l_torch = p.line(x='epoch', y='test_acc_new', color='#e33270', line_alpha=0.8, line_width=2, source=source)
    p.xaxis.axis_label = 'Epochs'
    p.yaxis.axis_label = 'Accuracy'
    p.yaxis.formatter = NumeralTickFormatter(format='0 %')
    

    p.add_tools(HoverTool(
        tooltips=[
                  ('Epochs', '@epoch{int}'),
                  ('No Skip Link', '@{test_acc_base}{%0.2f}'),
                  ('NumPy Skip Link', '@{test_acc_new}{%0.2f}'),
        ],

        mode='mouse'
    ))

    legend = Legend(items=[('No Skip Link', [l_van]), ('NumPy Skip Link', [l_torch]),], location=(0,250))
    p.add_layout(legend, 'left')

    show(p)

## Synopsis: Compare Part ( i ) and ( ii ) on Dataset I

In [0]:
plot_results_compare_set_one(result_baseline_pytorch_mnist, results_skip_numpy_mnist)

## Synopsis: Compare Part ( iii ) and ( iv ) on Dataset II

In [0]:
plot_results_compare_set_two(result_baseline_pytorch_cifar, results_skip_numpy_cifar_two)