In [2]:
## Python Package Imports
import numpy as np
import pandas as pd
import pickle

## Custom Module Imports
from activation_functions.SoftMax import SoftMax
from activation_functions.ReLU import ReLU
from loss_functons.mean_square_error import mean_square_error
from activation_functions.LeakyReLU import LeakyReLU
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [2]:

class nn_layer:
    """
    Represents a weight matrix (rows, cols) = (num_neurons, input_size)
    num_neurons is the number of neurons we wish to put in this layer
    input_size is the fixed value defined by the last layer's outputs

    The relationship between input size and number of neurons for multiple layers is ---
    input_size = num_neurons_prev
    input_size_next/output_size = num_neurons
    ... etc
    """
    def __init__(self, input_size, num_neurons, activ_func):
        self.num_in_n = input_size
        self.num_out_n = num_neurons
        # self.weight_matrix = np.array([np.random.rand(input_size) for _ in range(num_neurons)])
        # Above line has been upgraded to line below
        self.weight_matrix = np.random.randn(self.num_out_n, self.num_in_n)
        self.bias = np.random.rand(self.num_out_n)
        self.activation_func = activ_func

    def batch_input(self, input_matrix):
        self.input_matrix = input_matrix
        self.batch_size = input_matrix.shape[0]
        """
        Returns the matrix product [input_matrix] * [weight_matrix]^T of dimensions
        (batch_size, num_in_neurons) * (num_in_neurons, num_out_neurons) = (batch_size, num_out_neurons)
        Where the output columns of the matrix are the output of the i^{th} layer of neurons

        
        (batch_size, num_out_neurons) + (num_out_neurons) is XW^T + bias, where the bias is added row-wise (to each row/neuron layer)
        """     
        self.raw_output = np.dot(self.input_matrix, self.weight_matrix.T) + self.bias_vector
        self.activation_output = self.activation_func.forward(self.raw_output)
        return self.activation_output
    
    def backward(self, error_vector):
        """
        Given the error vector dC/da^(l), returns the new error vector for the next layer, dC/da^(l-1)
        C = cost func
        a^(l) = vector of activation functions at layer l, dim(a^(l))=num_neurons
        z = w*x + b

        Individual parials:
        dC/da_i = error_vector_i = (1)
        da_i/dz_i = self.activation_func.derivative(raw_output[:, i]) = (batch_size,)
        dz_i/dw_ij = (X_1j, X_2j, ..., X_num_inputsj) = self.input_matrix[:, j] = (batch_size,)
        
        dC/da = error_vector = (1,)
        da/dz = a_prime = self.activation_func.derivative(raw_output) = (batch_size, num_out_neurons)
        dz/dw = z_prime = self.input_matrix = (batch_size, num_in_neurons)

        np.outer()

        col_avg ( dC/da * da/dz * dz/dw ) = Grad = (num_out_n)
        """

        eta = 0.01
        self.num_in_n
        self.num_out_n
        self.batch_size
        dC_da_1 = error_vector # derivative of cost wrt activation function at current layer, a vector indicating the change in cost at this 
        da_dz = self.activation_func.derivative(self.raw_output) # (batch_size, num_in_n)
        dz_dw = self.input_matrix # (batch_size, num_out_n)

        # Below computes tensor dot product along specified axes, here we compute the dot product of tensors along (axis 0,axis 0), then sum along the axis.
        # Note these axis have to be the same length
        # Description of np.tensordot
        # axes=0 gives outer product
        # axes=1 gives inner product
        # axes=2 gives tensor contraction
        da_dw = np.tensordot(dz_dw, da_dz, axes=(0,0)) / self.batch_size  # (num_out_n, num_in_n) = dim(W)
        dC_dw = da_dw * error_vector 
        
        
        da_da = np.matmul(da_dz, self.weight_matrix.T) # (batch_size, num_out_n)
        # sum average gradient across all batches
        dC_da_0 = np.sum(da_da, axis=0) * (dC_da_1 / self.batch_size) # (num_out_n)
        self.weight_matrix = self.weight_matrix - eta * dC_dw
        return dC_da_0
    

class simple_neural_network:
    """
    Represents a neural network as an array of 'nn_layer' objects
    """
    def __init__(self, input_size, loss_func):
        self.nn_array = []
        self.input_size = input_size
        self.loss_func = loss_func


    def add_layer(self, num_neurons, activ_func):
        """
        New layer must have input size corresponding to previous layer's output size
        num_neurons - is the number of neurons in the current layer
        activ_func - is the activation function that should be applied to the outputs of this layer
        """
        if(len(self.nn_array) == 0):
            self.nn_array.append(nn_layer(self.input_size, num_neurons, activ_func))
        else:
            prev_output_size = self.nn_array[-1].weight_matrix.shape[0]
            self.nn_array.append(nn_layer(
                input_size = prev_output_size, 
                num_neurons = num_neurons, 
                activ_func=activ_func))


    def describe_network(self):
        # weight matrix shape is (num_neurons, input_size)
        for layer in self.nn_array:
            print(layer)

    def forward_pass(self, input_matrix):
        for i in range(len(self.nn_array)):
            layer = self.nn_array[i]
            input_matrix = layer.batch_input(input_matrix)    
        return input_matrix
    
    def backward_pass(self, error_vector):
        for i in range(len(self.nn_array)-1, -1, -1):
            layer = self.nn_array[i]
            error_vector = layer.backwards(error_vector)

In [9]:
np.reshape(np.ravel(np.outer(np.array([1,1,1]), np.array([1,0,0,0]))), (3,4))

np.reshape(2, 3, 2)

array([[1, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 0, 0, 0]])

In [51]:
t1 = np.array([[1,2,3],[10,10,10],[1,2,3],[1,2,3],[1,2,3]])
t2 = np.array([[1,2], [0,0], [1,2], [1,2], [1,2]])

print(t1.shape)
print(t2.shape)
t3 = np.array([np.outer(x,y) for x,y in zip(t1, t2)])
#Below computes tensor dot product along specified axes, here we compute the dot product of tensors along (t1 axis 0, t2 axis 0), then sum along the axis.
# np.tensordot discrepency
# axes=0 gives outer product
# axes=1 gives inner product
# axes=2 gives tensor contraction
np.tensordot(t1, t2, axes=(0,0)) 

# v1 = np.array([1,2,3,4,5])
# v2 = np.array([1,1,1,1,1])
# np.tensordot(v1,v2)


(5, 3)
(5, 2)


array([[ 4,  8],
       [ 8, 16],
       [12, 24]])

In [3]:
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size=0.2, random_state=42, shuffle=True)

nn = simple_neural_network(10, loss_func=mean_square_error())
nn.add_layer(20, LeakyReLU(0.01))
nn.add_layer(10, LeakyReLU(0.01))
nn.add_layer(1, LeakyReLU(0.01))
out_mat = nn.forward_pass(X_train)

# batch 1
mse_func = mean_square_error()
X_b1 = out_mat.T[0][:10]
y_b1 = y_test[:10]




In [4]:
mse_func.derivative(y_b1, X_b1)

-311.47141262434855

In [34]:
X_testing = np.random.randn(5,3)
relu_func = ReLU()
print(X_testing)
relu_func.forward(X_testing)

[[ 1.21852714  0.21872944  0.71768446]
 [-0.28915268 -0.73782518 -0.13573518]
 [ 1.82579171  0.23179451  0.22917273]
 [-2.55440273  1.35334641  0.66002298]
 [-1.99393952  1.07318724 -0.46297956]]


array([[1.21852714, 0.21872944, 0.71768446],
       [0.        , 0.        , 0.        ],
       [1.82579171, 0.23179451, 0.22917273],
       [0.        , 1.35334641, 0.66002298],
       [0.        , 1.07318724, 0.        ]])

In [32]:
np.max(X_testing, axis=0, keepdims=True)

array([[1.50213451, 0.41024829, 0.7674119 ]])

In [29]:
np.sum(X_testing - np.max(X_testing, axis=0, keepdims=True), axis=0)

array([-2.80409741, -7.32605188, -3.5663163 ])

In [33]:
sftmx = SoftMax()
sftmx.forward(X_testing)


array([[0.36939791, 0.108236  , 0.52236608],
       [0.27441081, 0.20518464, 0.52040455],
       [0.56091167, 0.32022445, 0.11886388],
       [0.4025412 , 0.38272902, 0.21472978],
       [0.62157997, 0.10690752, 0.27151251]])

In [21]:
X.shape[0]

array([[ 0.1001067 , -1.36177439, -0.41402302],
       [-1.0133265 , -0.14221217, -0.18116488],
       [-0.26464783,  0.70709005, -0.90289345],
       [-0.59329777, -0.57996122,  0.21845546],
       [-0.53239851,  1.48729854, -1.19441311]])

In [6]:
# Code Structure

# Module: Activation Functions

# Module: Neural Networks
# nn_layer
#   function backwards
#   --> inputs are (loss matrix, i.e. gradient for each neuron)
#   --> gradient of loss function d/dx activ_func(x)
#   --> update weight x_i = d/dx_i activ_func(x) for each weight in a neuron
#   --> 

# nn_full_simple
# figure out how to numerical differentiation

In [7]:
mse_func = mean_square_error()
mse = mse_func.compute(np.ones(5), np.zeros(5))
mse_deriv = mse_func.derivative(np.ones(5), np.zeros(5))

print(mse, mse_deriv)

1.0 -2.0
