# Assignment 1
## Still need to implement momentum and mini-batch SGD

In [45]:
import math, random
import numpy as np

In [46]:
# Load in the training set
X_train = np.load('Assignment1-Dataset/train_data.npy')
y_train = np.load('Assignment1-Dataset/train_label.npy')

# # Load in the test set
X_test = np.load('Assignment1-Dataset/test_data.npy')
y_test = np.load('Assignment1-Dataset/test_label.npy')

print(X_train.shape)
print(X_test.shape)

# Normalize (optional)
def normalize(X):
    X = (X - X.mean(axis=0)) / X.std(axis=0)
    return X
X_train = normalize(X_train)

(50000, 128)
(10000, 128)


In [47]:
# Randomly permute [0,N] and extract indices for each fold
def crossval_folds(N, n_folds, seed=1):
    np.random.seed(seed)
    idx_all_permute = np.random.permutation(N)
    N_fold = int(N/n_folds)
    idx_folds = []
    for i in range(n_folds):
        start = i*N_fold
        end = min([(i+1)*N_fold, N])
        idx_folds.append(idx_all_permute[start:end])
    return idx_folds

## Define evaluation function

In [48]:
def eval():

    # Set up
    hidden_layers = [5] # number of nodes in hidden layers i.e. [layer1, layer2, ...]
    lr = 0.05 # learning rate
    n_epochs = 20 # number of training epoch
    batch_size = 256
    N, d = X_train.shape
    n_classes = len(np.unique(y_train))

    print(" Data description --->  X.shape = {}, y.shape = {}, n_classes = {}\n".format(X_train.shape, y_train.shape, n_classes))
    print("Model details:")
    print(" input_dim = {}".format(d))
    print(" hidden_layers = {}".format(hidden_layers))
    print(" output_dim = {}".format(n_classes))
    print(" eta = {}".format(lr))
    print(" n_epochs = {}".format(n_epochs))

    # Train/evaluate the model on each fold
    acc_train, acc_val = list(), list()  # training/test accuracy score

    print("Training model......")
    # Build neural network classifier model and train
    model = NN(input_dim=d, output_dim=n_classes, n_hidden_layer=hidden_layers, batch_size=batch_size) #, seed=seed_weights
    model.train(X_train, y_train, lr=lr, n_epochs=n_epochs)

    # Make predictions for training and test data
    ypred_train = model.predict(X_train)
    ypred_test = model.predict(X_test)

    print("ypred_train", len(ypred_train))
    print(type(ypred_train))
    print(ypred_train[:20])
    print("quantity")
    print(type(y_train))
    print(len(y_train))
    # print("np.sum", np.sum(y_train==ypred_train))
    print()
    # Compute training/test accuracy score from predicted values
    print("Calculating accuracies.....")
    acc_train = accuracy(y_train, ypred_train)
    acc_test = accuracy(y_test, ypred_test)
    # acc_train = np.sum(y_train==ypred_train)/len(y_train)
    # acc_test = np.sum(y_test==ypred_test)/len(y_test)

    print(acc_train)
    print(acc_test)

def accuracy(x, y):
    count = 0
    assert len(x) == len(y)
    for i in range(len(x)):
        if x[i] == y[i]: count += 1
    acc = count / len(x)
    return acc

## Coding the Neural Network

In [51]:
class NN:
    
    def __init__(self, input_dim, output_dim, n_hidden_layer, batch_size):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.batch_size = batch_size
        self.n_hidden_layer = n_hidden_layer
        self.network = self._build_network()

    # < ---- Basic numpy functions ---- > #
    # Sigmoid (activation function)
    def _sigmoid(self, x):
        return 1.0/(1.0+math.exp(-x))

    # Sigmoid derivative
    def _sigmoid_derivative(self, sigmoid):
        return sigmoid*(1.0-sigmoid)

    # One-hot encoding
    def _one_hot_encoding(self, idx, output_dim):
        x = np.zeros(output_dim, dtype=np.int)
        x[idx] = 1
        return x

    # ReLu activation function
    def _relu(self, x):
        return max(0, x)

    # ReLu derivative
    def _relu_derivative(self, x):
        if x < 0: return 0
        else: return 1
    
    # < ---- Construct the Batch Normalization Layer ---- >
    def batch_normalize(self, x):
        norm = np.linalg.norm(x)
        if norm == 0: return x
        else: return x / norm

    def drop_out(self, x, rate):
        set_0 = round(len(x) * rate)
        for i in random.sample(range(len(x)), set_0):
            x[i] = 0
        return x

    # < ---- Building the network architecture ---- > #
    def _build_network(self):
        
        # Create a single fuly connected layer
        def fc_layers(input_dim, output_dim):
            layer = []
            for i in range(output_dim): # Add a weight between each node and unit
                weights= [random.random() for _ in range(input_dim)] # Determine FC layer with randomised/normalized w initialization
                node = {"weight" : weights, 
                         "output": None,
                         "delta": None}
                layer.append(node) # Create the layer
            return layer

        network = [] # Build the network layer by layer
        if len(self.n_hidden_layer) == 0:
            network.append(fc_layers(self.input_dim, self.output_dim))
        else:
            network.append(fc_layers(self.input_dim, self.n_hidden_layer[0]))
            for i in range(1, len(self.n_hidden_layer)):
                network.append(fc_layers(self.n_hidden_layer[i-1], self.n_hidden_layer[i]))
            network.append(fc_layers(self.n_hidden_layer[-1], self.output_dim))

        return network

    # < ---- Training the model ---- > #

    # Training the network
    def train(self, X, y, n_epochs=100, lr=0.005, batch_size=256):
        for epoch in range(n_epochs):
            print("Training {} epoch".format(epoch))
            # ============================================= #
            # < ---- Add in mini-batch training here ---- > #
            # ============================================= #
            n_batches = round(len(y)/batch_size)
            for i in range(n_batches):
                for _X, _y, in zip(X[i*batch_size:(i+1)*batch_size], y[i*batch_size:(i+1)*batch_size]):
                    y_label = self._one_hot_encoding(_y, self.output_dim)
                    self._forward_pass(_X)
                    self._back_propagation(y_label)
                    self._update_weights(_X, lr)

    # Forward-pass function
    def _forward_pass(self, x):
        relu = self._relu
        transfer = self._sigmoid
        x_in = x
        for layer in self.network[:-1]:
            x_out = []
            for node in layer:
                node['output'] = relu(np.dot(node['weight'], x_in)) # Get inner product of the input and weights
                # node['output'] = transfer(self._dotprod(node['weight'], x_in)) # Get inner product of the input and weights
                x_out.append(node['output'])
            x_in = x_out # Pass the output of this layer as the input to the next layer\
        x_out = []
        for node in self.network[-1]:
            node['output'] = transfer(np.dot(node['weight'], x_in))
            x_out.append(node['output'])
        x_in = x_out # Pass the output of this layer as the input to the next layer\

        return x_in


    # Back propagation function
    def _back_propagation(self, y_label):
        transfer_derivative = self._sigmoid_derivative
        n_layers = len(self.network)
        for i in reversed(range(n_layers)):
            # Backpropagate from the output later
            if i == n_layers - 1:
                for j, node in enumerate(self.network[i]):
                    err = node['output'] - y_label[j]
                    node['delta'] = err * transfer_derivative(node['output'])
            else:
                # Weighted sum of gradient from upper layer
                for j, node in enumerate(self.network[i]):
                    err = sum([node_['weight'][j] * node_['delta'] for node_ in self.network[i+1]])
                    node['delta'] = err * self._relu_derivative(node['output'])

    def _update_weights(self, x, lr):
        for i, layer in enumerate(self.network):
            if i == 0:
                inputs = x
            else:
                inputs = [node_['output'] for node_ in self.network[i-1]]

                # Update weights
                for node in layer:
                    for j, inpt in enumerate(inputs):
                        node['weight'][j] -= node['delta'] * lr * inpt

    # < ---- Making predictions ---- #
    def predict(self, x):
        pred = np.array([np.argmax(self._forward_pass(_x)) for _x in x], dtype=np.int)
        return pred

    # < ---- Define a new optimizer here ---- > #
    def optimizer():
        pass

## Evaluating the model

In [52]:
eval()

 Data description --->  X.shape = (50000, 128), y.shape = (50000, 1), n_classes = 10

Model details:
 input_dim = 128
 hidden_layers = [5]
 output_dim = 10
 eta = 0.05
 n_epochs = 20
Training model......
Training 0 epoch
Training 1 epoch
Training 2 epoch
Training 3 epoch
Training 4 epoch
Training 5 epoch
Training 6 epoch
Training 7 epoch
Training 8 epoch
Training 9 epoch
Training 10 epoch
Training 11 epoch
Training 12 epoch
Training 13 epoch
Training 14 epoch
Training 15 epoch
Training 16 epoch
Training 17 epoch
Training 18 epoch
Training 19 epoch
ypred_train 50000
<class 'numpy.ndarray'>
[2 6 1 2 7 3 6 6 6 6 1 6 6 6 6 6 6 6 6 1]
quantity
<class 'numpy.ndarray'>
50000

Calculating accuracies.....
0.11668
0.1228
