In [18]:
import numpy as np
from scipy.io import loadmat
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [210]:
class NeuralNetwork:
    
    def __init__(self,layer_size_list,train_dataset,test_dataset,batch_size):
        self.num_layers = len(layer_size_list)
        self.layer_size_list = layer_size_list
        self.batch_size = batch_size
        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        np.random.seed(101)
        self.weight_list = [np.random.randn(layer_size_list[i+1],layer_size_list[i]) for i in range(len(layer_size_list)-1)]
        #self.single_bias_list = [np.random.randn(x,1) for x in layer_size_list[1:]]
        self.bias_list = self.initalize_biases()
        #self.last_batch_biases = []
        self.no_of_layers = len(layer_size_list)

    def initalize_biases(self):
        bias_list = [np.empty((x.shape[0],self.batch_size)) for x in self.single_bias_list]
        for i,b in enumerate(bias_list):
            b[:,:] = self.single_bias_list[i]
        return bias_list
        
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))

    def feedforward(self,mini_batch):
        activations_list = [0] * self.no_of_layers
        activations_list[0] = mini_batch.T
        prev_layer_activations = activations_list[0]
        for layer in range(1,self.no_of_layers):     # l= 2 to L
            WX = np.dot(self.weight_list[layer-1],prev_layer_activations)   
            bias_matrix = np.empty((WX.shape))
            bias_matrix[:,:] = self.bias_list[layer - 1]
            Z = WX + self.bias_list[layer-1]
            sigmoid_Z = self.sigmoid(Z)
            activations_list[layer] = sigmoid_Z
            prev_layer_activations = sigmoid_Z
        return activations_list

    def delta_output_layer(self,mini_batch,mini_batch_labels):
        deltas_list = [0] * self.no_of_layers
        true_labels_matrix = np.zeros((mini_batch.shape[0],self.layer_size_list[-1]))
        for index,row in enumerate(mini_batch):
            true_labels_matrix[index][int(mini_batch_labels[index])-1] = 1
        activations_list = self.feedforward(mini_batch[:,:400])
        output_layer_activations = activations_list[-1]
        sigma_prime_output_layer = output_layer_activations * (1 - output_layer_activations)
        delta_output_layer = (output_layer_activations - true_labels_matrix.T) * sigma_prime_output_layer
        deltas_list[self.no_of_layers - 1] = delta_output_layer
        return deltas_list,activations_list
             
    def backpropagate(self,mini_batch,mini_batch_labels):
        deltas_list,activations_list = self.delta_output_layer(mini_batch,mini_batch_labels)
        upstream_gradient = deltas_list[-1]
        for layer in range(self.no_of_layers,1,-1):
            downstream_gradient = np.dot(self.weight_list[layer-2].T,upstream_gradient) * (activations_list[layer-2]*(1 - activations_list[layer-2]))
            deltas_list[layer-2] = downstream_gradient
            upstream_gradient = downstream_gradient
        return deltas_list,activations_list
            
    def SGD(self,learning_rate):
        lr = learning_rate
        """--------------- Generate Mini Batches -----------------"""

        for batch in range(0,len(self.train_dataset),self.batch_size):
            if batch > len(self.train_dataset) - self.batch_size:
                mini_batch = self.train_dataset[batch:][:,:400]
                mini_batch_labels = self.train_dataset[batch:][:,400]
            else:
                mini_batch = self.train_dataset[batch:batch + self.batch_size][:,:400]
                mini_batch_labels = self.train_dataset[batch:batch + self.batch_size][:,400]
            print(mini_batch.shape)
            deltas_list,activations_list = self.backpropagate(mini_batch,mini_batch_labels)
            
            for layer in range(self.no_of_layers,1,-1):
                self.weight_list[layer-2] -= lr * np.dot(deltas_list[layer-1],activations_list[layer-2].T)
                #print('self.bias_list[layer-2]',self.bias_list[layer-2].shape)
                #print('deltas_list[layer-1]',deltas_list[layer-1].shape)
                if batch > len(self.train_dataset) - self.batch_size:
                    print(self.last_batch_biases[0].shape)
                    print(self.last_batch_biases[1].shape)
                    self.last_batch_biases[layer-2] -= lr * deltas_list[layer-1]
                else:
                    self.bias_list[layer-2] -= lr * deltas_list[layer-1]
        
                    
    def evaluate(self,test_dataset):
        prev_layer_activations = test_dataset.T
        for index,weight in enumerate(self.weight_list):
            WX= np.dot(weight,prev_layer_activations)
            bias_matrix = np.empty((WX.shape))
            bias_matrix[:,:] = self.bias_list[index]
            Z = WX + bias_matrix
            sigmoid_Z = self.sigmoid(Z)
            prev_layer_activations = sigmoid_Z7                       

In [211]:
dataset_mat = loadmat(r'D:\\Courses\\Fall 19\\ELEG 815 Statistical Learning\\HW7\DatasetDigit.mat')
data = np.array(dataset_mat['X'])
labels = np.array(dataset_mat['y'])
dataset = np.concatenate((data,labels),axis=1)
dataset = shuffle(dataset)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.20, random_state=42)
NN = NeuralNetwork([400,25,10],train_dataset, test_dataset,64)
NN.SGD(learning_rate= 2)

(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(64, 400)
(32, 400)
(25, 32)
(10, 32)
(25, 32)
(10, 32)
