# Testing the dense NN for classification using the MNIST image data

## Author: Bojian Xu, bojianxu@ewu.edu

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nn
import sys
sys.path.append('..')

class MyUtils:
    def normalize_0_1(X):
        ''' Normalize the value of every feature into the [0,1] range, using formula: x = (x-x_min)/(x_max - x_min)
            1) First shift all feature values to be non-negative by subtracting the min of each column 
               if that min is negative.
            2) Then divide each feature value by the max of the column if that max is not zero. 
            
            X: n x d matrix of samples, excluding the x_0 = 1 feature. X can have negative numbers.
            return: the n x d matrix of samples where each feature value belongs to [0,1]
        '''

        n, d = X.shape
        X_norm = X.astype('float64') # Have a copy of the data in float

        for i in range(d):
            col_min = min(X_norm[:,i])
            col_max = max(X_norm[:,i])
            gap = col_max - col_min
            if gap:
                X_norm[:,i] = (X_norm[:,i] - col_min) / gap
            else:
                X_norm[:,i] = 0 #X_norm[:,i] - X_norm[:,i]
        
        return X_norm
    def normalize_neg1_pos1(X):
        ''' Normalize the value of every feature into the [-1,+1] range. 
            
            X: n x d matrix of samples, excluding the x_0 = 1 feature. X can have negative numbers.
            return: the n x d matrix of samples where each feature value belongs to [-1,1]
        '''

        n, d = X.shape
        X_norm = X.astype('float64') # Have a copy of the data in float

        for i in range(d):
            col_min = min(X_norm[:,i])
            col_max = max(X_norm[:,i])
            col_mid = (col_max + col_min) / 2
            gap = (col_max - col_min) / 2
            if gap:
                X_norm[:,i] = (X_norm[:,i] - col_mid) / gap
            else: 
                X_norm[:,i] = 0 #X_norm[:,i] - X_norm[:,i]

        return X_norm

    
    def z_transform(X, degree = 2):
        
        ''' Transforming traing samples to the Z space
            X: n x d matrix of samples, excluding the x_0 = 1 feature
            degree: the degree of the Z space
            return: the n x d' matrix of samples in the Z space, excluding the z_0 = 1 feature.
            It can be mathematically calculated: d' = \sum_{k=1}^{degree} (k+d-1) \choose (d-1)

        '''
 
        # Set r to degree
        r = degree
        
        # degree $leq$ 1, return x 
        if r <= 1:
            return X
        
        # n is the number of X's rows --> The number of points
        # d is the number of X's cols --> The dimensionality 
        n,d = np.shape(X)
        
        # Z is going to be a copy of x = Starts out exactly the same 
        Z = X.copy()
        
        
        
        # next it is necessary to create all of the buckets
        # a bucket is a matrix with all the possible combinations of multiplications which acheives a certain, single degree 
        # the # of buckets is conceptuall known d -r -1 Choose d - 1 
        # let's save those numbers in an array 
        
        #there will b r buckets 
        
        # B is a list with a bunch of buckets  
        B = []
        
        
        # the number of buckets 
        for i in range(r):
            # append a number - the ith bucket size which can be calculated w/ this equation
            # math.comb = n choose k 
            m = d+i # 0-based indexing t.f. the -1 is gone, d is the size of the X matrix 
            k = d-1 
            B.append(math.comb(m,k))
    
   
        ell = np.arange(np.sum(B)) # The summation of all the elements in the B array

        q = 0 # the total size of all of the buckets before the previous bucket
        
        p = d # the size of the previous bucket
        g = p
        
        # at the beginning, there is one bucket 
        for i in range(1, r): # 1, 2, 3, ... r-1 
            
            # create each bucket up to the ith bucket, visit the previous bucket 

            # go through every element in the previous bucket - the range starting from q going to q+p 
            for j in range(q, p):
                head = ell[j]

        
                # this tracks the index of the new column
           
            
                # go from head to lexographically highest feature
                for k in range(head, d):

                    #elementwise multiplication
                    temp = (Z[: ,j] * X[:, k]).reshape(-1,1)
                    # insert new column temp on right side
                    Z = np.append(Z, temp, axis=1)
                    
                    # j is hte index of the column you are currently computing
                    ell[g] = k # just multiplied w/ x's k column

                    g += 1

            # adding previous bucket into p the new previous buck
            q = p 

            # the new previous bucket is going to be i which is the current i but will soon be updated 
            p += B[i] 
 

        
        assert Z.shape[1] == np.sum(B)
        
        return Z
    
    


In [2]:
k = 10  #number of classes
d = 784 #number of features, excluding the bias feature

In [3]:
# READ in data
df_X_train = pd.read_csv('MNIST/x_train.csv', header=None)
df_y_train = pd.read_csv('MNIST/y_train.csv', header=None)
df_X_test = pd.read_csv('MNIST/x_test.csv', header=None)
df_y_test = pd.read_csv('MNIST/y_test.csv', header=None)

# save in numpy arrays
X_train_raw = df_X_train.to_numpy()
y_train_raw = df_y_train.to_numpy()
X_test_raw = df_X_test.to_numpy()
y_test_raw = df_y_test.to_numpy()



# get training set size
n_train = X_train_raw.shape[0]
n_test = X_test_raw.shape[0]

# normalize all features to [0,1]
X_all = MyUtils.normalize_0_1(np.concatenate((X_train_raw, X_test_raw), axis=0))
X_train = X_all[:n_train]
X_test = X_all[n_train:]


# convert each label into a 0-1 vector
y_train = np.zeros((n_train, k))
y_test = np.zeros((n_test, k))
for i in range(n_train):
    y_train[i,int(y_train_raw[i])] = 1.0
for i in range(n_test):
    y_test[i,int(y_test_raw[i])] = 1.0


print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
#print(y_test)

(60000, 784)
(60000, 10)
(10000, 784)
(10000, 10)


In [4]:
#print(y_train[-10:])

In [15]:
# build the network
nuts = NeuralNetwork()

nuts.add_layer(d = d)  # input layer - 0

nuts.add_layer(d = 100, act = 'relu')  # hidden layer - 1
nuts.add_layer(d = 30, act = 'relu')  # hiddent layer - 2
#nuts.add_layer(d = 100, act = 'relu')  # hiddent layer - 3
#nuts.add_layer(d = 30, act = 'relu')  # hiddent layer - 4

nuts.add_layer(d = k, act = 'logis')  # output layer,    multi-class classification, #classes = k

In [17]:
nuts.fit(X_train, y_train, eta = 0.1, iterations = 10, SGD = True, mini_batch_size = 20)

X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)
X_star:  (20, 10)
Y:  (20, 10)


In [18]:
print(nuts.error(X_train, y_train))
print(nuts.error(X_test, y_test))

[0.83838333]
[0.8418]


In [7]:
preds = nuts.predict(X_test)

In [8]:
#print(preds[:100])
#print(y_test_raw[:100])
print(np.sum(preds != y_test_raw))

10000


In [9]:
for i in range(y_test.shape[0]):
    if preds[i] != y_test_raw[i]:
        print('misclassified!!')
    print('predicted as', preds[i])
    print('label is', y_test_raw[i])
    pixels = X_test_raw[i].reshape((28, 28))
    plt.imshow(pixels, cmap='gray')
    plt.show()

TypeError: 'NoneType' object is not subscriptable

In [6]:
# Jordan Driscoll 905812

## delete the `pass` statement in every function below and add in your own code. 


import numpy as np



# Various math functions, including a collection of activation functions used in NN.




class MyMath:

    def tanh(x):
        ''' tanh function. 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array where every element is tanh of the corresponding element in array x
        '''
        v_tanh = np.vectorize(np.tanh)
        return v_tanh(x)
   

    
    def tanh_de(x):
        ''' Derivative of the tanh function. 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array where every element is tanh derivative of the corresponding element in array x
        '''
        v_tanh_de = np.vectorize(lambda x: 1 - (np.tanh(x) ** 2))
        return v_tanh_de(x)

    
    
    def logis(x):
        ''' Logistic function. 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array where every element is logistic of 
                    the corresponding element in array x
        '''
        v_sigmoid = np.vectorize(lambda x: 1 / (1 + np.exp(-x)))
        return v_sigmoid(x)
         
        
        

    
    def logis_de(x):
        ''' Derivative of the logistic function. 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array where every element is logistic derivative of 
                    the corresponding element in array x
        '''
        # The sigmoid function
       
        # The vectorized sigmoid function
        sigmoid = lambda s: 1 / (1 + np.exp(-s))
        
        v_sigmoid = np.vectorize(lambda x: sigmoid(x) * (1 - sigmoid(x)))
        
        return v_sigmoid(x)
        

    
    def iden(x):
        ''' Identity function
            Support vectorized operation
            
            x: an array type of real numbers
            return: the numpy array where every element is the same as
                    the corresponding element in array x
        '''
        return np.array(x) 
        

    
    def iden_de(x):
        ''' The derivative of the identity function 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array of all zeros of the same shape of x.
        '''
        return np.ones(np.array(x).shape)
        

    
    def relu(x):
        ''' The ReLU function 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array where every element is the max of: zero vs. the corresponding element in x.
        '''
        v_relu = np.vectorize(lambda x: x * (x > 0))
        return v_relu(x)
    
    

    
    def _relu_de_scaler(x):
        ''' The derivative of the ReLU function. Scaler version.
        
            x: a real number
            return: 1, if x > 0; 0, otherwise.
        '''
        return 1 if x > 0 else 0 

    
    def relu_de(x):
        ''' The derivative of the ReLU function 
            Support vectorized operation

            x: an array type of real numbers
            return: the numpy array where every element is the _relu_de_scaler of the corresponding element in x.   
        '''
        v_relu_de = np.vectorize(MyMath._relu_de_scaler)
        return v_relu_de(x)


In [14]:
# Jordan Driscoll 905812





# Implementation of the forwardfeed neural network using stachastic gradient descent via backpropagation
# Support parallel/batch mode: process every (mini)batch as a whole in one forward-feed/backtracking round trip. 



import numpy as np
import math
import math_util as mu
import nn_layer


class NeuralNetwork:
    
    
   
    
    def __init__(self):
        self.layers = []     # the list of L+1 layers, including the input layer. 
        self.L = -1          # Number of layers, excluding the input layer. 
                             # Initting it as -1 is to exclude the input layer in L.

    
    
    def add_layer(self, d = 1, act = 'tanh'):
        ''' The newly added layer is always added AFTER all existing layers.
            The firstly added layer is the input layer.
            The most recently added layer is the output layer. 
            
            d: the number of nodes, excluding the bias node, which will always be added by the program. 
            act: the choice of activation function. The input layer will never use an activation function even if it is given. 
            
            So far, the set of supported activation functions are (new functions can be easily added into `math_util.py`): 
            - 'tanh': the tanh function
            - 'logis': the logistic function
            - 'iden': the identity function
            - 'relu': the ReLU function
        '''
        # Create the new neural layer using the passed in variables
        new_layer = NeuralLayer(d, act)
        # Add the layers to the list of layers
        self.layers.append(new_layer)
        # Add the layer index
        self.L += 1
        
    

    def _init_weights(self):
        ''' Initialize every layer's edge weights with random numbers from [-1/sqrt(d),1/sqrt(d)], 
            where d is the number of nonbias node of the layer
        '''
        
        weight_rng= np.random.default_rng(2142)
        # We go through the layers excluding the first one 
        for layer_id in range(1, self.L + 1): 
            
            curr_layer = self.layers[layer_id]
            prev_layer = self.layers[layer_id - 1]
           
            # go through and create the w for each l 
            layer_n = prev_layer.d + 1 # number of nodes this layer, +1 is included as bias  
            layer_d = curr_layer.d # number of connections this layer
            
            range_min = -1 / math.sqrt(layer_d)
            range_max = 1 / math.sqrt(layer_d)
            
            curr_layer.W = weight_rng.uniform(low=range_min, high=range_max, size=(layer_n,layer_d))
            
            # curr_layer.W = np.random.uniform(low=range_min, high=range_max, size=(layer_n,layer_d))
            
            
            # Create a Matrix of Zeros of Size l_n, l_d 

    
    
    ############### Update Final Delta and Gradient ############################
    def _update_final_gradient_and_delta(self, c, Y):
        
        final_layer = self.layers[self.L] 
        
        transform_str = "ij,ik->jk"
        
        # Delta^L --> Matrix of all of the vectors of all of the partial derivatives of the errors
        # D_L = 2 * (X* - Y) * derrived activation(S)            
        # The final X without the bias 
        X_star = final_layer.X[:, 1:]
        
        # The final S 
        S_final = final_layer.S
        
        # The final Delta
        a = 2 * (X_star - Y)
        print("X_star: ", X_star.shape)
        print("Y: ", Y.shape)
        b = final_layer.act_de(S_final)
        
        Delta_final = a * b
                
        final_layer.Delta = Delta_final
        
        ############### OBATINING G ###########################
        # G is the gradient --> This is the line of ups & downs
        # Second to last layer's X 
        A = self.layers[self.L - 1].X
        
        B = Delta_final
        
        # Update the final G
        final_layer.G = c * np.einsum(transform_str, A, B)

    
    def _backpropogate(self, layer_id, c):
        # The previous layer 
        p_layer = self.layers[layer_id - 1]
        
        # The Current Layer
        layer = self.layers[layer_id]
        
        #The next layer
        n_layer = self.layers[layer_id + 1]
        
        # Obtain the S with the derived activation function applied to it 
        S = layer.act_de(layer.S)
        
        # The W in the equation w/out bias 
        W = n_layer.W[1:].T 
        
        # The delta is the next layer's delta
        d = n_layer.Delta
        
        layer.Delta = S * (d @ W)
        
        # X is the previous layer's X
        A = p_layer.X
        
        B = layer.Delta
        
        transform_str = "ij,ik->jk"
        
        # Calculate Gradient from each layer 
        layer.G = c * np.einsum(transform_str, A, B)
    
    
    def _update_minibatch(self, d):
        # update c 
        d['c'] += d['m']
        # update end 
        d['e'] = d['c']+ d['m']
        # set s to c 
        d['s'] = d['c']
        if(d['n'] == d['m']):
            d['s'] = 0
            d['c'] = 0 
            d['e'] = d['n']
        else:
            # if e's too big but c isn't 
            if(d['e'] >= d['n'] and d['c'] <= d['n']):
                d['s'] = d['c']
                d['c'] = -d['m']
                d['e'] = d['n']
                
            # if e is too big 
            # e > n 
            if(d['e'] > d['n']):
                # c = 0
                d['c'] = 0
                # e = c + m
                d['e'] = d['c'] + d['m']
        
            return d
        
    # Prediction Should Be (97%)
    def fit(self, X, Y, eta = 0.01, iterations = 1000, SGD = True, mini_batch_size = 1):
        ''' Find the fitting weight matrices for every hidden layer and the output layer. Save them in the layers.
          
            X: n x d matrix of samples, where d >= 1 is the number of features in each training sample
            Y: n x k vector of lables, where k >= 1 is the number of classes in the multi-class classification
            eta: the learning rate used in gradient descent
            iterations: the maximum iterations used in gradient descent
            SGD: True - use SGD; False: use batch GD
            mini_batch_size: the size of each mini batch size, if SGD is True.  
        '''
        print("Initial Y Shape: ", Y.shape)
        
        self._init_weights()  # initialize the edge weights matrices with random numbers.
        
        shuffle = True
            
        # The first layer
        first_layer = self.layers[0]
        
        # Obtain the shape of n 
        n, d = X.shape
        
        if(not SGD):
            mini_batch_size = n        
        
        m = mini_batch_size
        
        # Shuffle X * y 
        if(shuffle):
            # Makes it such that X & y are shuffled with each other 
            Xy = np.append(X, Y, axis=1)
            # shuffles the Xy array 
            np.random.shuffle(Xy)
                
            # extracts y 
            Y = Xy[:, d:]
                
            # extracts X 
            X = Xy[:, :d]

        
        # This stores all of the necessary data for the minibatch
        minibatch_data = dict({'n':n, 'c':0, 'm':mini_batch_size,'e':mini_batch_size,'s':0})
        
        
        
        
        
        # Run for the number of times that are available
        for iteration in range(iterations):           
            
            X_mini = X[minibatch_data['c']:minibatch_data['e']]
            y_mini = Y[minibatch_data['c']:minibatch_data['e']]
            print("Y mini shape: ", y_mini.shape)
            c = 1 / X_mini.shape[0]
            
            # Add Bias Column to the 0th layer's X 
            first_layer.X = np.insert(X_mini, 0, 1, axis=1)
        
            
            
            ######### FORWARD FEEDING #################
            # For Each Layer Excluding the First One 
            for layer_id in range(1, self.L + 1):       
                self._Forward_Feed(layer_id)    
                
     

            ############ UPDATE FINAL G & D #################
            # Update the final gradient and Delta
            self._update_final_gradient_and_delta(c, y_mini)

            

            
            ############## BACKPROPOGATION #######################
            # Then go through and update all the delta previous to that
            # Back Propogation: Starting from the back going to the front
            for layer_id in reversed(range(1, self.L)):
                self._backpropogate(layer_id, c)
                
                
            
            ############## UPDATE THE WEIGHTS ################
            for layer_id in range(1, self.L + 1):
                layer = self.layers[layer_id]
                layer.W -= eta * layer.G
         
            # Adjust 
            if(SGD):
                minibatch_data = self._update_minibatch(minibatch_data)
            
            
            
            
            
            
            
        
        
        # I will leave you to decide how you want to organize the rest of the code, but below is what I used and recommend. Decompose them into private components/functions. 

        ## prep the data: add bias column; randomly shuffle data training set. 

        ## for every iteration:
        #### get a minibatch and use it for:
        ######### forward feeding
        ######### calculate the error of this batch if you want to track/observe the error trend for viewing purpose.
        ######### back propagation to calculate the gradients of all the weights
        ######### use the gradients to update all the weight matrices. 

        pass

    
    def _Forward_Feed(self, layer_id): 
        
        # the previous layer
        p_layer = self.layers[layer_id - 1]
            
        # the current layer 
        layer = self.layers[layer_id]
        
        # Set the S 
        S = p_layer.X @ layer.W
        
        # Run the activation function on S
        layer.X = layer.act(S)
        
        # Add bias to the X 
        layer.X = np.insert(layer.X, 0, 1, axis=1)
        
        # Set the layer's S 
        layer.S = S 
        

        
    def predict(self, X):
        ''' X: n x d matrix, the sample batch, excluding the bias feature 1 column.
            
            return: n x 1 matrix, n is the number of samples, every row is the predicted class id.
         '''
        # Get X to the right shape 
        # Set as an np array
        X = np.array(X)
        
        X = X.reshape(-1, self.layers[0].d)
        
        # Add bias column
        X = np.insert(X, 0, 1, axis=1)
        
        # Take the first layer and make it's inputted X the sample X being predicted
        self.layers[0].X = X
        
        # go through layer excluding the first one 
        for layer_id in range(1, self.L + 1):
                       
            self._Forward_Feed(layer_id)
            
            
        
        # Take the final layer 
        final_layer = self.layers[self.L]
        
        # Knock off the bias column
        final_layer.X = final_layer.X[:, 1:]
        
        # Then return an n x 1 with the arg max's
        out = np.argmax(final_layer.X, axis=1)
        
        out = out.reshape(-1, 1)
        
        return out 
         
            
             
            
          
            
            
            
            
            
        
        return _Forward_Feed(X)
    
        
    
    def error(self, X, Y):
        ''' X: n x d matrix, the sample batch, excluding the bias feature 1 column. 
               n is the number of samples. 
               d is the number of (non-bias) features of each sample. 
            Y: n x k matrix, the labels of the input n samples. Each row is the label of one sample, 
               where only one entry is 1 and the rest are all 0. 
               Y[i,j]=1 indicates the ith sample belongs to class j.
               k is the number of classes. 
            
            return: the percentage of misclassfied samples
            
        '''
        
        n, d = X.shape
        
        # Find the nx1 predicition matrix 
        preds = self.predict(X)
        
        # Take all of the maximum's from the Y matrix - results in an nx1 matrix
        y = np.argmax(Y, axis=1) 
        
        y = y.reshape(-1, 1) 
        
        # Return the summation of the number of equal args
        err = sum(preds != y)
        
        # # of err / # of samples
        return err / n
   

In [8]:
# Author: Bojian Xu, bojianxu@ewu.edu


# Implementation of one layer used in the forward feeding and back propagation neural network 

import numpy as np
import math_util as mu

class NeuralLayer:
    def __init__(self, d = 1, act = 'tanh'):
        ''' d: the number of NON-bias nodes in the layer
                             
            act: the activation function. It will not be useful/used, regardlessly, at the input layer.
                 1) 'tanh': the tanh function
                 2) 'logis': the logistic function
                 3) 'iden': the identity function
                 4) 'relu': the ReLU function 
        '''

        self.d = d   # the number of non-bias nodes

        self.act = eval('MyMath.' + act)   # the activation function, not useful/used at the input layer
        self.act_de = eval('MyMath.' + act + '_de')  # the derivative of the activation function, not useful/used at the input layer 
        
        # The following matrix/vectors are to be materalized by the NN-level code. Some are not useful for the input layer. 
        # Below, N' represents the minibatch size, \ell represents the index of this layer. 
        self.S = None       # N' x d^{(\ell)} matrix. Each row is the vector of the d signals, sent into the d nodes, by each sample. Not useful for the input layer. 
        self.X = None       # N' x (d^{(\ell)}+1) matrix. Each row is the vector of the d+1 outputs, sent out by the bias node and the d neurons, by each sample.
        self.Delta = None   # N' x d^{(\ell)} matrix. Each row is vector of delta = \partial E / \partial S, where E is the error. Not useful for the input layer
        self.G = None       # (d^{(\ell-1)}+1 ) x d^{(\ell)} matrix. The gradient of E over W.
        self.W = None       # (d^{(\ell-1)}+1 ) x d^{(\ell)} matrix. The weights of the edges coming into layer \ell.


In [9]:
import numpy as np
import pandas as pd


verbose = True

def main():
    passed_math_util = test_math_util()
    print("Passed Math!")
    passed_add_layer = test_add_layer()
    print("Passed Add Layer!")
    passed_init_weights = test_init_weights()
    print("Passed Init Weights!")

    # Check if weights are initializing to seed's values.
    # weight_rng = np.random.default_rng(2142) and weight_rng.uniform used to generate weights.
    passed_seeded_weights = test_seeded_weights() #Also, verifies if _init_weights and add_layer is working.
  
    
    if not passed_seeded_weights:
        print("Additional tests cannot be accurately performed without a set of seeded weights.\nUnder nn.NeuralNetwork._init_weights, please use weight_rng = np.random.default_rng(2142) and weight_rng.uniform used to generate weights.")
        print(f"Current results:\npassed_math_util: {passed_math_util}, passed_add_layer: {passed_add_layer}\n\
            passed_init_weights{passed_init_weights}, passed_seeded_weights: {passed_seeded_weights}")
        return
    print("Passed Seeded Weights!")
    (X_train,y_train,X_test,y_test) = loadData()
    print("Here we are!!")

def test_seeded_weights():
    d = 2
    k = 2
    passed = True

    # build the network
    nuts = NeuralNetwork()

    nuts.add_layer(d = d)  # input layer - 0
    nuts.add_layer(d = 5, act = 'relu')  # hidden layer - 1
    nuts.add_layer(d = k, act = 'logis')  # output layer

    nuts._init_weights()

    seed_weights = load_weights()
    for layer, seed_weight in zip(nuts.layers[1:],seed_weights):
        seed_weight = np.array(seed_weight)
        weight = np.array(layer.W)

        if (weight != seed_weight).any():
            if verbose:
                print(f"check_seeded_weights:\nExpected:\n{seed_weight}\nFound:\n{weight}")
            passed = False

    return passed

def test_init_weights():
    d = 10
    k = 8
    passed = True

    # build the network
    nuts = NeuralNetwork()

    nuts.add_layer(d = d)  # input layer - 0
    nuts.add_layer(d = 5, act = 'relu')  # hidden layer - 1
    nuts.add_layer(d = k, act = 'logis')  # output layer

    nuts._init_weights()

    #Check dimensionality of weights
    if nuts.layers[0].W != None:
        print("")

    shapes = [(11,5),(6,8)]
    for layer, dim in zip(nuts.layers[1:], shapes):
        if layer.W.shape != dim:
            if verbose:
                print(f"test_init_weights: Invalid dimensions of the instantiated weights. Expected {dim}, found {layer.W.shape}")
            passed = False

    return passed

def test_add_layer():
    # build the network
    nuts = NeuralNetwork()

    assert nuts.L == -1, f"After initialization, L = -1. Found L = {nuts.L}"

    passed = True

    nuts.add_layer(d = 5, act = 'logis')
    if nuts.L != 0:
        if verbose:
            print(f"test_add_layer: After adding a layer, L = 0. Found L = {nuts.L}")
        passed = False
    if len(nuts.layers) != 1:
        if verbose:
            print(f"test_add_layer: Failed to add layer to layers.")
        passed = False

    return passed

def test_math_util():
    passed = [test_tanh(), test_tanh_de(), test_logis(), test_logis_de(), test_iden(), test_iden_de(), test_relu(), test_relu_de()]
    if all(passed):
        return True
    else:
        print(f"test_math_util passed methods:\n\
tanh: {passed[0]}, tanh_de: {passed[1]}\n\
logis: {passed[2]}, logis_de: {passed[3]}\n\
iden: {passed[4]}, iden_de: {passed[5]}\n\
relu: {passed[6]}, relu_de: {passed[7]}")
    
        return False

def test_tanh():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.0,0.761594156,0.9640275801,-0.761594156]
    y_hat = MyMath.tanh(x)
    return _test_math_util(x,y,y_hat,"tanh")

def test_tanh_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [1,0.419974,0.0706508,0.419974]
    y_hat = MyMath.tanh_de(x)
    return _test_math_util(x,y,y_hat,"tanh_de")

def test_logis():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.5,0.7310586,0.8807971,0.2689414]
    y_hat = MyMath.logis(x)
    return _test_math_util(x,y,y_hat,"logis")

def test_logis_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.25,0.196612,0.104994,0.196612]
    y_hat = MyMath.logis_de(x)
    return _test_math_util(x,y,y_hat,"logis_de")

def test_iden():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.0,1.0,2.0,-1.0]
    y_hat = MyMath.iden(x)
    return _test_math_util(x,y,y_hat,"iden")

def test_iden_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [1,1,1,1]
    y_hat = MyMath.iden_de(x)
    return _test_math_util(x,y,y_hat,"iden_de")

def test_relu():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.0,1.0,2.0,0.0]
    y_hat = MyMath.relu(x)
    return _test_math_util(x,y,y_hat,"relu")

def test_relu_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [0,1,1,0]
    y_hat = MyMath.relu_de(x)
    return _test_math_util(x,y,y_hat,"relu_de")

def _test_math_util(x,y,y_hat,name):

    passed = True

    if not _is_numpy_array(y_hat):
        if verbose:
            print(f"Incorrect return type. Expected type {np.ndarray}, but found {type(y_hat)}")
        passed = False

    for y, y_hat, x in zip(y, y_hat, x):
        if not inThreshold(y,y_hat,0.00001):
            if verbose:
                print(f"Incorrect {name} value, expected {y}, but found {y_hat} for x = {x}")
            passed = False
    return passed

def _is_numpy_array(x):
    return isinstance(x,np.ndarray)

def inThreshold(x1, x2, threshold=1.1):
    return abs(x1 - x2) < threshold

def saveAllZ(Z_list,file="output.npz"):
    np.savez(file,*Z_list)

def clearZ(file="output.npz"):
    f = open(file, "w")
    f.close()

def loadData(data_set='ionoshpere'):
    k = 10
    d = 784

    #Reads the files into pandas dataframes from the respective .csv files.
    path = 'MNIST'
    df_X_train = pd.read_csv(f'{path}/X_train.csv', header=None)
    df_y_train = pd.read_csv(f'{path}/y_train.csv', header=None)
    df_X_test = pd.read_csv(f'{path}/X_test.csv', header=None)
    df_y_test = pd.read_csv(f'{path}/y_test.csv', header=None)

    # save in numpy arrays
    X_train_raw = df_X_train.to_numpy()
    y_train_raw = df_y_train.to_numpy()
    X_test_raw = df_X_test.to_numpy()
    y_test_raw = df_y_test.to_numpy()

    # get training set size
    n_train = X_train_raw.shape[0]
    n_test = X_test_raw.shape[0]

    # normalize all features to [0,1]
    X_all = MyUtils.normalize_0_1(np.concatenate((X_train_raw, X_test_raw), axis=0))
    X_train = X_all[:n_train]
    X_test = X_all[n_train:]

    # convert each label into a 0-1 vector
    y_train = np.zeros((n_train, k))
    y_test = np.zeros((n_test, k))
    for i in range(n_train):
        y_train[i,int(y_train_raw[i])] = 1.0
    for i in range(n_test):
        y_test[i,int(y_test_raw[i])] = 1.0

    #Insure that the data correctly loaded in.
    assert X_train.shape == (60000, 784), "Incorrect input, expected (60000, 784), found " + X_train.shape
    assert y_train.shape == (60000, 10), "Incorrect input, expected (60000, 10), found " + y_train.shape
    assert X_test.shape  == (10000, 784), "Incorrect input, expected (10000, 784), found " + X_test.shape
    assert y_test.shape  == (10000, 10), "Incorrect input, expected (10000, 10), found " + y_test.shape

    return (X_train,y_train,X_test,y_test)

def load_weights(file="../seeded_weights.npz"):
    container = np.load(file)
    weight_list = [container[key] for key in container]
    return weight_list

if __name__ == '__main__':
    main()

Passed Math!
Passed Add Layer!
Passed Init Weights!
Passed Seeded Weights!
Here we are!!


TypeError: only integer scalar arrays can be converted to a scalar index

In [16]:


verbose = True

def main():
    passed_math_util = passed_add_layer = passed_init_weights = passed_seeded_weights = passed_test_fit = passed_test_weights = False

    passed_math_util = test_math_util()
    passed_add_layer = test_add_layer()
    passed_init_weights = test_init_weights()

    print(f"\n######### INITIAL RESULTS #########\npassed_math_util: {passed_math_util}, passed_add_layer: {passed_add_layer}, passed_init_weights: {passed_init_weights}")

    if not (passed_math_util and passed_add_layer and passed_init_weights):
        print("Stopping tester due to failed test of a fundamental functionality.\nPlease review the tester that failed.")
        return

    test_fit() #Checks to see if the fit method crashes from running a handful of iterations.

    train_and_save_weights() #Trains the weights and saves them to "weights.npz". Expected validation error is less than 0.04 or 4%.
    
    test_saved_weights() #Verifies the accuracy of the model's resulting weights
    
def test_fit():
    (X_train,y_train,X_test,y_test) = loadData()
    nuts = _createNN()
    nuts.fit(X_train, y_train, eta = 0.1, iterations = 5, SGD = True, mini_batch_size = 20)

def test_saved_weights(file = "weights.npz"):
    (X_train,y_train,X_test,y_test) = loadData()

    #loads weights into nuts
    npz_weights = load_weights(file=file)
    nuts = _createNN()
    _import_weights(npz_weights, nuts)

    train_error = nuts.error(X_train, y_train)
    test_error  = nuts.error(X_test,  y_test)

    print(f"######### TRAINING RESULTS - Model error #########\nTrain: {np.round(train_error, 4)}, Test: {np.round(test_error, 4)}")
    if test_error > 0.00001 and test_error < 0.05:
        print("test_saved_weights: SUCCESS!!!")
    else:
        print(f"test_saved_weights: Insufficient model accuracy. Expcected error less than 0.05 or 5%\nActual test error: {np.round(test_error, 4)}")
    if test_error <= 0.00001:
        print("test_saved_weights: Test error is suspiciously low. Please reevaluate your error method.")
    return False

def _import_weights(npz_weights, nuts):
    for ell in range(1, nuts.L+1):
        nuts.layers[ell].W = np.array(npz_weights[ell - 1])

def train_and_save_weights():
    (X_train,y_train,X_test,y_test) = loadData()
    nuts = _createNN()
    nuts.fit(X_train, y_train, eta = 0.1, iterations = 10000, SGD = True, mini_batch_size = 20)
    _save_weights(nuts)

def _save_weights(nuts):
    file = "weights.npz"
    clearZ(file=file)
    weights_list = []

    for ell in range(1, nuts.L+1):
        cur_layer = nuts.layers[ell]
        weights_list.append(cur_layer.W)

    saveAllZ(weights_list, file=file)

def _createNN(k = 10, d = 784):
    nuts = NeuralNetwork()
    nuts.add_layer(d = d)  # input layer - 0
    nuts.add_layer(d = 100, act = 'relu')  # hidden layer - 1
    nuts.add_layer(d = 30, act = 'relu')  # hiddent layer - 2
    nuts.add_layer(d = k, act = 'logis')  # output layer,    multi-class classification, #classes = k
    return nuts

def test_seeded_weights():
    d = 2
    k = 2
    passed = True

    # build the network
    nuts = NeuralNetwork()

    nuts.add_layer(d = d)  # input layer - 0
    nuts.add_layer(d = 5, act = 'relu')  # hidden layer - 1
    nuts.add_layer(d = k, act = 'logis')  # output layer

    nuts._init_weights()

    seed_weights = load_weights()
    for layer, seed_weight in zip(nuts.layers[1:],seed_weights):
        seed_weight = np.array(seed_weight)
        weight = np.array(layer.W)

        if (weight != seed_weight).any():
            if verbose:
                print(f"check_seeded_weights:\nExpected:\n{seed_weight}\nFound:\n{weight}")
            passed = False

    return passed

def test_init_weights():
    d = 10
    k = 8
    passed = True

    # build the network
    nuts = NeuralNetwork()

    nuts.add_layer(d = d)  # input layer - 0
    nuts.add_layer(d = 5, act = 'relu')  # hidden layer - 1
    nuts.add_layer(d = k, act = 'logis')  # output layer

    nuts._init_weights()

    #Check dimensionality of weights
    if nuts.layers[0].W != None:
        print("")

    shapes = [(11,5),(6,8)]
    for layer, dim in zip(nuts.layers[1:], shapes):
        if layer.W.shape != dim:
            if verbose:
                print(f"test_init_weights: Invalid dimensions of the instantiated weights. Expected {dim}, found {layer.W.shape}")
            passed = False

    return passed

def test_add_layer():
    # build the network
    nuts = NeuralNetwork()

    assert nuts.L == -1, f"After initialization, L = -1. Found L = {nuts.L}"

    passed = True

    nuts.add_layer(d = 5, act = 'logis')
    if nuts.L != 0:
        if verbose:
            print(f"test_add_layer: After adding a layer, L = 0. Found L = {nuts.L}")
        passed = False
    if len(nuts.layers) != 1:
        if verbose:
            print(f"test_add_layer: Failed to add layer to layers.")
        passed = False

    return passed

def test_math_util():
    passed = [test_tanh(), test_tanh_de(), test_logis(), test_logis_de(), test_iden(), test_iden_de(), test_relu(), test_relu_de()]
    if all(passed):
        return True
    else:
        print(f"test_math_util passed methods:\n\
tanh: {passed[0]}, tanh_de: {passed[1]}\n\
logis: {passed[2]}, logis_de: {passed[3]}\n\
iden: {passed[4]}, iden_de: {passed[5]}\n\
relu: {passed[6]}, relu_de: {passed[7]}")
    
        return False

def test_tanh():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.0,0.761594156,0.9640275801,-0.761594156]
    y_hat = MyMath.tanh(x)
    return _test_math_util(x,y,y_hat,"tanh")

def test_tanh_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [1,0.419974,0.0706508,0.419974]
    y_hat = MyMath.tanh_de(x)
    return _test_math_util(x,y,y_hat,"tanh_de")

def test_logis():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.5,0.7310586,0.8807971,0.2689414]
    y_hat = MyMath.logis(x)
    return _test_math_util(x,y,y_hat,"logis")

def test_logis_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.25,0.196612,0.104994,0.196612]
    y_hat = MyMath.logis_de(x)
    return _test_math_util(x,y,y_hat,"logis_de")

def test_iden():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.0,1.0,2.0,-1.0]
    y_hat = MyMath.iden(x)
    return _test_math_util(x,y,y_hat,"iden")

def test_iden_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [1,1,1,1]
    y_hat = MyMath.iden_de(x)
    return _test_math_util(x,y,y_hat,"iden_de")

def test_relu():
    x = [0.0,1.0,2.0,-1.0]
    y = [0.0,1.0,2.0,0.0]
    y_hat = MyMath.relu(x)
    return _test_math_util(x,y,y_hat,"relu")

def test_relu_de():
    x = [0.0,1.0,2.0,-1.0]
    y = [0,1,1,0]
    y_hat = MyMath.relu_de(x)
    return _test_math_util(x,y,y_hat,"relu_de")

def _test_math_util(x,y,y_hat,name):

    passed = True

    if not _is_numpy_array(y_hat):
        if verbose:
            print(f"Incorrect return type. Expected type {np.ndarray}, but found {type(y_hat)}")
        passed = False

    for y, y_hat, x in zip(y, y_hat, x):
        if not inThreshold(y,y_hat,0.00001):
            if verbose:
                print(f"Incorrect {name} value, expected {y}, but found {y_hat} for x = {x}")
            passed = False
    return passed

def _is_numpy_array(x):
    return isinstance(x,np.ndarray)

def inThreshold(x1, x2, threshold=1.1):
    return abs(x1 - x2) < threshold

def saveAllZ(Z_list,file="output.npz"):
    np.savez(file,*Z_list)

def clearZ(file="output.npz"):
    f = open(file, "w")
    f.close()

def loadData():
    k = 10
    d = 784

    #Reads the files into pandas dataframes from the respective .csv files.
    path = 'MNIST'
    df_X_train = pd.read_csv(f'{path}/X_train.csv', header=None)
    df_y_train = pd.read_csv(f'{path}/y_train.csv', header=None)
    df_X_test = pd.read_csv(f'{path}/X_test.csv', header=None)
    df_y_test = pd.read_csv(f'{path}/y_test.csv', header=None)

    # save in numpy arrays
    X_train_raw = df_X_train.to_numpy()
    y_train_raw = df_y_train.to_numpy()
    X_test_raw = df_X_test.to_numpy()
    y_test_raw = df_y_test.to_numpy()

    # get training set size
    n_train = X_train_raw.shape[0]
    n_test = X_test_raw.shape[0]

    # normalize all features to [0,1]
    X_all = MyUtils.normalize_0_1(np.concatenate((X_train_raw, X_test_raw), axis=0))
    X_train = X_all[:n_train]
    X_test = X_all[n_train:]

    # convert each label into a 0-1 vector
    y_train = np.zeros((n_train, k))
    y_test = np.zeros((n_test, k))
    for i in range(n_train):
        y_train[i,int(y_train_raw[i])] = 1.0
    for i in range(n_test):
        y_test[i,int(y_test_raw[i])] = 1.0

    #Insure that the data correctly loaded in.
    assert X_train.shape == (60000, 784), "Incorrect input, expected (60000, 784), found " + X_train.shape
    assert y_train.shape == (60000, 10), "Incorrect input, expected (60000, 10), found " + y_train.shape
    assert X_test.shape  == (10000, 784), "Incorrect input, expected (10000, 784), found " + X_test.shape
    assert y_test.shape  == (10000, 10), "Incorrect input, expected (10000, 10), found " + y_test.shape

    return (X_train,y_train,X_test,y_test)

def load_weights(file="seeded_weights.npz"):
    container = np.load(file)
    weight_list = [container[key] for key in container]
    return weight_list

if __name__ == '__main__':
    main()


######### INITIAL RESULTS #########
passed_math_util: True, passed_add_layer: True, passed_init_weights: True
######### TRAINING RESULTS - Model error #########
Train: [0.0281], Test: [0.0324]
test_saved_weights: SUCCESS!!!
