## Creating a SoftMax Neural Net from Scratch

In [35]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plot 
import seaborn 
import tensorflow as tf

In [36]:
#Load the data
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, y_train, x_test, y_test = x_train.astype('int'), y_train.astype('int'), x_test.astype('int'), y_test.astype('int')

In [26]:
class Neural_Net_Softmax():
    
    #Initialise with the input shape and output shape
    def __init__(self, input_features, output_features):
        self.input_dimension = input_features
        self.output_dimensions = output_features
        
        #Initialise the dictionaries here so we can't accidentally store values we don't need if we change the input sizes.
        self.forward_prop_dict = {}
        self.back_prop_dict = {}

    def create_layers(array_of_mid_layer_sizes, activation_functions = 'relu'):
        self.layer_sizes = array_of_mid_layer_sizes
        
        #for layer type, we can either set all layers to one type or have an array of layers passed
        if activation_functions in ('relu', 'sigmoid')::
            self.layer_types = [activation_functions] * len(array_of_mid_layer_sizes)
        else:
            self.layer_types = activation_functions

    def init_weights(self,array_of_mid_layer_sizes, array_of_mid_layer_activations, init_method='gaussian'):
        self.weights = {}
        all_layers = [self.input_dimension] + array_of_mid_layer_sizes + [self.output_dimensions] #
        
        for layer in range(len(all_layers) - 1):
            output_dim = all_layers[layer+1]
            input_dim = all_layers[layer] 

            #Gaussian, Xavier or He weights initialisation methods below (COULD DO THIS IN HELPER FUNCTIONS AND THEN CALL THE RELEVANT FUNCTION)
            if init_method == 'gaussian': 
                self.weights[f'W{layer+1}'] = np.random.randn(output_dim,input_dim)
                self.weights[f'B{layer+1}'] = np.random.randn(output_dim,1)
            
            if init_method == 'xavier': 
                self.weights[f'W{layer+1}'] = np.random.uniform(-np.sqrt(6)/(np.sqrt(output_dim+input_dim)),np.sqrt(6)/(np.sqrt(output_dim+input_dim)),[output_dim,input_dim])
                self.weights[f'B{layer+1}'] = np.random.uniform(-np.sqrt(6)/(np.sqrt(output_dim+1)),np.sqrt(6)/(np.sqrt(output_dim+1)),[output_dim,1])

            ##ADD IN THE 'HE' for relu
        return self.dict_of_weights
    
    def forward_prop(self, x_flat, one_hot_y, regularisation = False, gradient_smoother = False):
        self.forward_prop_dict['A0'] = x_flat #This allows us include x_train in the loop.
        layers = int(len(self.weights.keys())/2)

        #First, do all the layers up to thte softmax layer ,which has a different activation function.
        for key, activation_function in zip(range(1,layers),self.layer_types): #starts at 1, goes up to the last hidden layer
            #call the relevant function
            activation = locals()[f'_{activation_function}_fp']
            self.forward_prop_dict[f'Z{key}'], self.forward_prop_dict[f'A{key}'] = activation(self.forward_prop_dict[f'A{key-1}'], self.weights[f'W{key}'], self.weights[f'B{key}'])

        #Then do the final layer
        #then add cost function
    '''end of function'''

    
    #Helper functions defining sigmoid, relu and softmax activation functions.
    def _sigmoid(x):
        return (1/(1+np.exp(-x)))
    def _sigmoid_fp(prev_a, W, B):
        Z = W @ prev_a + B
        return Z, _sigmoid(Z)
    
    def _relu(x):
        return(max(0,x))
    def _relu_fp(prev_a, W, B):
        Z = W @ prev_a + B
        return Z, _relu(Z)

    def _last_leg_softmax(previous_a, W, B):
        Z = W @ previous_a + B
        A = np.exp(Z)
        A = A / np.sum(A,axis=0) 
        return Z, A   

In [43]:
def create_layers1(array_of_mid_layer_sizes, activation_functions = 'relu'):
        layer_sizes = array_of_mid_layer_sizes
        
        #for layer type, we can either set all layers to one type or have an array of layers passed
        if activation_functions in ('relu', 'sigmoid'):
            layer_types = [activation_functions] * len(array_of_mid_layer_sizes)
        else:
            layer_types = activation_functions
        return layer_sizes, layer_types

In [51]:
function_a = locals()["create_layers1"]

In [52]:
function_a([1,2,3],'relu')

([1, 2, 3], ['relu', 'relu', 'relu'])

In [47]:
layers_test = create_layers1([1,2,3],'relu')
for key, layers in zip(range(3),layers_test[1]):
    print(key,layers)

0 relu
1 relu
2 relu
