In [42]:
import numpy as np
from scipy.special import expit
import pandas as pd
import math

class Neural_Network():
    
    def sigmoid(self,z):
        return 1.0/(1.0 + np.exp(-z))

    def sigmoidGradient(self,z):
        return self.sigmoid(z)*(1-self.sigmoid(z))
    
    def regularization(self, lamda, m):
        lamda_val = lamda/(2.0*m)
        theta1_sum = 0 
        theta2_sum = 0
        for j in range(len(self.Theta1)-1):
            for k in range(self.Theta1[0].size-1):
                theta1_sum += self.Theta1[j+1][k+1]*self.Theta1[j+1][k+1]
        for j in range(len(self.Theta2)-1):
            for k in range(self.Theta2[0].size-1):
                theta2_sum += self.Theta2[j+1][k+1]*self.Theta2[j+1][k+1]
        return lamda_val*(theta1_sum+theta2_sum)
    
    def calc_cost(self, y_vals, hyp, lamda, m): #hyp and y are both 10x1 vectors 
        cost = 0
        for k in range(y_vals.size):
            cost += -y_vals[k] * math.log(hyp[k]) - (1-y_vals[k])*math.log(1-hyp[k])
        return cost
    
    def nnCostFunction(self, nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_reg):

        #Theta1 = np.reshape(nn_params[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, input_layer_size + 1), order='F')
        self.Theta1 = nn_params[0]
        #Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], (num_labels, hidden_layer_size + 1), order='F')
        self.Theta2 = nn_params[1]
        m = len(X)
        labels = y.flatten()
        # set y to be matrix of size m x k
        y = np.zeros((m,num_labels))
        # for every label, convert it into vector of 0s and a 1 in the appropriate position
        for i in range(m): #each row is new training sample
            index = int(labels[i]-1)
            y[i][index] = 1
        J = 0;
        Theta1_grad = np.zeros_like(self.Theta1)
        Theta2_grad = np.zeros_like(self.Theta2)

        # add column of ones as bias unit from input layer to second layer
        X = np.hstack((np.ones((m,1)), X)) # = a1
        
        #Forward and Back prop: 

        bigDelta1 = 0
        bigDelta2 = 0
        cost_temp = 0

        # for each training example
        for t in range(m):


            ## step 1: perform forward pass
            # set lowercase x to the t-th row of X
            x = X[t]
            # note that uppercase X already included column of ones 
            # as bias unit from input layer to second layer, so no need to add it

            # calculate second layer as sigmoid( z2 ) where z2 = Theta1 * a1
            z2 = np.matmul(x,self.Theta1.T)
            a2 = self.sigmoid(z2)

            # add column of ones as bias unit from second layer to third layer
            a2 = np.concatenate((np.array([1]), a2))
            # calculate third layer as sigmoid (z3) where z3 = Theta2 * a2
            z3 = np.matmul(a2,self.Theta2.T)
            a3 = self.sigmoid(z3)

            ## step 2: 
            delta3 = np.zeros((num_labels))

            #subtract actual val in y from each hypothesized val in a3
            y_vals = np.zeros((num_labels))
            for k in range(num_labels): #for each of the 10 labels subtract
                y_k = y[t][k]
                y_vals[k] = y_k
                delta3[k] = a3[k] - y_k\

            ## step 3: for the hidden layer l=2, set delta2 = Theta2' * delta3 .* sigmoidGradient(z2)
            # note that we're skipping delta2_0 (=gradients of bias units, which we don't use here)
            delta2 = np.matmul(self.Theta2[:,1:].T, delta3) * self.sigmoidGradient(z2)

            ## step 4: accumulate gradient from this example
            # accumulation
            bigDelta1 += np.outer(delta2, x)
            bigDelta2 += np.outer(delta3, a2)

            cost_temp += self.calc_cost(y_vals, a3, lambda_reg, m)
        term1 = (1/m)*cost_temp
        term2 = self.regularization(lambda_reg, m)
        J = term1 + term2
        print("Cost is: " + str(J))
        # step 5: obtain gradient for neural net cost function by dividing the accumulated gradients by m
        Theta1_grad = bigDelta1 / m
        Theta2_grad = bigDelta2 / m
        print("Theta1 Grad:")
        print(Theta1_grad)
        print("Theta2 Grad:")
        print(Theta2_grad)
        

        #% REGULARIZATION FOR GRADIENT
        # only regularize for j >= 1, so skip the first column
        Theta1_grad_unregularized = np.copy(Theta1_grad)
        Theta2_grad_unregularized = np.copy(Theta2_grad)
        Theta1_grad += (float(lambda_reg)/m)*self.Theta1
        Theta2_grad += (float(lambda_reg)/m)*self.Theta2
        Theta1_grad[:,0] = Theta1_grad_unregularized[:,0]
        Theta2_grad[:,0] = Theta2_grad_unregularized[:,0]
        

df = pd.read_csv(r'/Users/elliesuit/emnist/Emnist Data/Theta1.csv', header = None)
df2 = pd.read_csv(r'/Users/elliesuit/emnist/Emnist Data/Theta2.csv', header = None)
df3 = pd.read_csv(r'/Users/elliesuit/emnist/Emnist Data/X.csv', header = None)
df4 = pd.read_csv(r'/Users/elliesuit/emnist/Emnist Data/Y.csv', header = None)
theta1 = np.zeros([25,401])
theta2 = np.zeros([10,26])
x = np.zeros([5000,400])
y = np.zeros([5000,1])
index = 0 
while (index < 25):
    theta1[index] = df.iloc[index]
    index+=1

index = 0
while(index<10):
    theta2[index] = df2.iloc[index]
    index+=1

index = 0
while(index<5000):
    x[index] = df3.iloc[index]
    index+=1
ones = np.ones((5000,1))
#x = np.hstack((ones, Xtemp)) #this is a1 (67) 

index = 0
while (index<5000):
    y[index] = df4.iloc[index]
    index+=1

nn_params = [theta1, theta2] 
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10
lambda_val = 1 
n = Neural_Network()
n.nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, lambda_val)
    
    

Cost is: 0.3783864266414258
Theta1 Grad:
[[ 6.18712551e-05  0.00000000e+00  0.00000000e+00 ...  9.70111000e-09
   2.85541444e-09  0.00000000e+00]
 [ 9.38798337e-05  0.00000000e+00  0.00000000e+00 ...  3.22774694e-08
  -1.26324304e-10  0.00000000e+00]
 [-1.92593582e-04  0.00000000e+00  0.00000000e+00 ...  7.05404527e-08
   1.41585607e-09  0.00000000e+00]
 ...
 [ 6.60569195e-05  0.00000000e+00  0.00000000e+00 ... -1.40472250e-08
   1.94786063e-09  0.00000000e+00]
 [ 2.90522067e-04  0.00000000e+00  0.00000000e+00 ...  5.06149566e-07
  -5.54722531e-08  0.00000000e+00]
 [-6.33752805e-05  0.00000000e+00  0.00000000e+00 ...  5.05494773e-09
   4.46821988e-09  0.00000000e+00]]
Theta2 Grad:
[[ 6.28737627e-04  7.50946267e-04  9.87964528e-05  1.48819864e-03
   7.31802071e-04  1.38113759e-03 -1.59325433e-04 -6.68870890e-04
  -1.24979364e-03 -9.66226119e-05  7.19244379e-04 -5.10976190e-04
   1.11120642e-03 -6.43551922e-04 -6.95182479e-04 -9.47091619e-04
   2.00794716e-04  9.50724937e-04 -5.42000289e