In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Acquiring / Loading data (from previous problem) **note that tensorflow is used solely for loading the dataset (tf.keras.datasets). Further, scikitlearn is used solely for splitting the data into training sets etc. the actual deep learning implementation is done using numpy and linear algebra

In [2]:
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split

In [17]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [18]:
def shapes():
        print(f'X Train set: {np.shape(x_train)}\nY Train set: {np.shape(y_train)}\n\nX Val set: {np.shape(x_val)}\nY val set: {np.shape(y_val)}\n\nX Test set: {np.shape(x_test)}\nY Test set: {np.shape(y_test)}')

# Picking random 3 classes

random_classes = [0,1,5]
def sample_subset(X, y, random_classes=random_classes):

    def unison_shuffled_copies(a, b):
        '''This function is to shuffled two numpy arrays in unison'''
        assert len(a) == len(b)
        p = np.random.permutation(len(a))
        return a[p], b[p]

    assert len(random_classes) == 3, 'Number of random classes must be three for this situation'

    class_x_tot = []
    class_y_tot = []
    for label in random_classes:
        class_x_tot.append(np.take(X, np.where(y==label)[0], axis=0))
        class_y_tot.append(np.take(y, np.where(y==label)[0], axis=0))
    
    X = np.concatenate((class_x_tot[0], class_x_tot[1], class_x_tot[2]))
    y = np.concatenate((class_y_tot[0], class_y_tot[1], class_y_tot[2]))

    X, y = unison_shuffled_copies(X, y)
    return X, y

#training set
x_train, y_train = sample_subset(x_train, y_train)

#testing set
x_test, y_test = sample_subset(x_test, y_test)
x_train, x_val, y_train, y_val = sklearn.model_selection.train_test_split(x_train, y_train, test_size = .20, random_state=1)

shapes()

X Train set: (12000, 32, 32, 3)
Y Train set: (12000, 1)

X Val set: (3000, 32, 32, 3)
Y val set: (3000, 1)

X Test set: (3000, 32, 32, 3)
Y Test set: (3000, 1)


#### Vectorization
1. Take the mean of the color channel
2. Divide by 255 to normalize (this should reduce it down to 32, 32 solely)
3. Reshape array to go from 32 x 32 dimension to one dimension (32*32 = 1024) in length
4. Pass this in to the network with input_shape = (32 * 32)

In [20]:
def vectorize(arr):
    # take mean of color channels and normalize by dividing by 255.
    arr = np.mean(arr, axis=3) / 255.

    #reshape it to go from 2 dimensions (32 x 32) to one dimension of 1024 (32*32)
    arr = arr.reshape((len(arr), (np.shape(arr)[1] * np.shape(arr)[2])))

    #return arr
    return arr

# run the function vectorize over all the x data
print(f'---------------Shape before----------------')
shapes()

x_train = vectorize(x_train)
x_val = vectorize(x_val)
x_test = vectorize(x_test)
print(f'---------------Shape after----------------')
shapes()

---------------Shape before----------------
X Train set: (12000, 32, 32, 3)
Y Train set: (12000, 1)

X Val set: (3000, 32, 32, 3)
Y val set: (3000, 1)

X Test set: (3000, 32, 32, 3)
Y Test set: (3000, 1)
---------------Shape after----------------
X Train set: (12000, 1024)
Y Train set: (12000, 1)

X Val set: (3000, 1024)
Y val set: (3000, 1)

X Test set: (3000, 1024)
Y Test set: (3000, 1)


In [21]:
def vectorize_y(arr):
    '''Prepares labels for training'''
    # Need to change the labels from 0,1,5 to 0,1,2 to ensure proper categorical encoding
    arr[arr==5]=2

    #now utilize to_categorical and return it
    return arr

print(f'---------------Shape before----------------')
shapes()

y_val = vectorize_y(y_val)
y_test = vectorize_y(y_test)
y_train = vectorize_y(y_train)
print(f'---------------Shape after----------------')
shapes()

---------------Shape before----------------
X Train set: (12000, 1024)
Y Train set: (12000, 1)

X Val set: (3000, 1024)
Y val set: (3000, 1)

X Test set: (3000, 1024)
Y Test set: (3000, 1)
---------------Shape after----------------
X Train set: (12000, 1024)
Y Train set: (12000, 1)

X Val set: (3000, 1024)
Y val set: (3000, 1)

X Test set: (3000, 1024)
Y Test set: (3000, 1)


### Building the model

In [57]:
class Node:
    def __init__(self, weight_dim):
        self.weights = np.random.rand(*weight_dim)
        self.bias = np.random.rand(1,1)
    def forward_prop(self, inputs):
        return np.dot(inputs, self.weights) + self.bias
    def backprop(self,dw,db,lr):
      self.weight -= lr*dw
      self.bias -= lr*db

class Layer:
    def __init__(self,units,input_dim,activation):
        self.weight_dim=(input_dim,1)
        self.layer = [Node(self.weight_dim) for i in range(units)]
        self.activation = activation

    # Activation functions

    def softmax(x):
        e_x = np.exp(x - np.max(x)) 
        return e_x / e_x.sum()
    
    def relu(x):
        return np.maximum(0, x)

    def sigmoid(x):
        if type(x) is list:
            x = np.array(x)

        z = np.exp(-x)
        return 1 / (1 + z)

    activations = {'softmax': softmax, 'relu': relu,'sigmoid': sigmoid}

    def forward(self, inputs):
        Z, A = [],[]
        for unit in self.layer:
            Z.append(unit.forward_prop(inputs))
        if self.activation == 'softmax':
            Z = np.array(Z).reshape(len(self.layer),1)
            A = Layer.activations[self.activation](Z)
            A = np.array(A).reshape(len(self.layer),1)
        else:
            A = Layer.activations[self.activation](Z)
        return np.array(A), np.array(Z)

    def get_weights(self):
        return np.array([unit.weights for unit in self.layer]).reshape(len(self.layer),self.weight_dim[0])
  
    def get_bias(self):
        return np.array([unit.bias for unit in self.layer]).reshape(len(self.layer),1)

#### Instantiate the Model
1. Hidden Layer with 32 units, input dimension 1024, relu activation
2. Hidden Layer with 16 units, input dimension 32, relu activation
3. Output layer with 3 units, input dimension 16, softmax activation

In [103]:
H1 = Layer(32,input_dim=x_train.shape[1],activation='relu')
H2 = Layer(16,input_dim=32,activation='sigmoid')
Out = Layer(3,input_dim=16,activation='softmax')

In [107]:
def one_hotencode_y(Y):
    ohey = np.zeros((Y.size, Y.max() + 1))
    ohey[np.arange(Y.size), Y] = 1
    ohey=ohey.T
    return ohey

# Loss Function
def loss(softmax_probs, truth_labels):
    '''Categorical Cross entropy implementation'''
    idx = np.argmax(truth_labels, 0)
    return -1.*np.log2(softmax_probs[idx])

#Evaluation function
def accuracy(preds, Y):
    return np.sum(preds == Y) / Y.size

def deriv_relu(Z):
    return (Z>0)

# Gradient calculation for back-prop
def gradient_calc(W1, W2, W3, Z1, Z2, Z3, A1, A2, A3, X, Y, m):
    #X_m = X_m.reshape(len(X_m),1)
    dZ3 = loss(A3,Y) # Y must be OHE truth label
    dW3 = (1 / m) * dZ3.dot(A2.T)
    db3 = (1 / m) * np.sum(dZ3)
    dZ2 = W3.T.dot(dZ3) * deriv_relu(Z2)
    # unfortunately didn't get to finish this

def update_params(**kwargs):
    '''This function is used to update the parameters from gradient descent''' 
    pass
    # didn't get to finish unfortunately


In [108]:
# one hot encode the labels -- ONLY RUN THIS CELL ONCE, next time it may throw an error
if y_train.shape[1]==1:
    y_train = one_hotencode_y(np.squeeze(y_train)).T
    y_val = one_hotencode_y(np.squeeze(y_val)).T
    y_test = one_hotencode_y(np.squeeze(y_test)).T
else:
    print('You already ran this cell once')

You already ran this cell once


In [109]:
# Test running forward prop on one training sample:
sample = x_train[0]
truth_label = y_train[0]
A1, Z1 = H1.forward(sample)
A2, Z2 = H2.forward(A1.T)
A3, Z3 = Out.forward(A2.T)

print(f'Softmax outputs:\n{A3}')
print(f'Truth label:\n{truth_label}')
print(f'Prediction class:\n{np.argmax(A3, 0)}')
print(f'CCE Loss: {loss(A3, truth_label)}')

W1 = H1.get_weights()
W2 = H2.get_weights()
W3 = Out.get_weights()

_='n/a'
print(f'Weights layer 1:\n{W1}\nShape:\n{W1.shape}')
print(f'Weights layer 2:\n{_}\nShape:\n{W2.shape}') #just substitute the '_' with W2 if you want to see the weights
print(f'Weights output layer:\n{_}\nShape:\n{W3.shape}')#just substitute the '_' with W3 if you want to see the weights




Softmax outputs:
[[0.54452758]
 [0.02453175]
 [0.43094066]]
Truth label:
[0. 0. 1.]
Prediction class:
[0]
CCE Loss: [1.21443886]
Weights layer 1:
[[0.15632147 0.87619761 0.72472755 ... 0.47131633 0.27373366 0.4943554 ]
 [0.83221198 0.71328989 0.46330544 ... 0.80643965 0.87247379 0.85818246]
 [0.27198979 0.34206889 0.74735344 ... 0.36485997 0.4037032  0.18204509]
 ...
 [0.69474832 0.72438283 0.71311575 ... 0.58424822 0.89795286 0.55872608]
 [0.15574284 0.87970034 0.13368188 ... 0.81603256 0.36605082 0.55940044]
 [0.86840559 0.78870439 0.49854224 ... 0.56653363 0.95256821 0.02360617]]
Shape:
(32, 1024)
Weights layer 2:
n/a
Shape:
(16, 32)
Weights output layer:
n/a
Shape:
(3, 16)


In [122]:
def gradient_descent(x_train, y_train, x_val, y_val, lr, epochs):
    for _ in range(epochs):

        for idx, row in enumerate(x_train):

            #Forward prop
            A1, Z1 = H1.forward(row)
            A2, Z2 = H2.forward(A1.T)
            A3, Z3 = Out.forward(A2.T)
            W1 = H1.get_weights()
            W2 = H2.get_weights()
            W3 = Out.get_weights()
            labels_train = np.argmax(A3, 0)
            cce_loss = loss(A3, y_train[idx])
            # Backward prop and updating weights
            # Unfortunately didn't get to finish this



        print('Epoch completed')


    #Evaluate the performance of the model now using the validation set
    #We can just pass the data through
    for idx, row in enumerate(x_val):
        #Forward prop
        A1, Z1 = H1.forward(row)
        A2, Z2 = H2.forward(A1.T)
        A3, Z3 = Out.forward(A2.T)
        W1 = H1.get_weights()
        W2 = H2.get_weights()
        W3 = Out.get_weights()
        cce_loss = loss(A3, y_train[idx])


In [123]:
gradient_descent(x_train, y_train, x_val, y_val, lr=0.01, epochs=1)

Epoch completed
