In [1]:
## Python Package Imports
import numpy as np
import pandas as pd
import pickle

## Custom Module Imports
from activation_functions.SoftMax import SoftMax
from activation_functions.ReLU import ReLU
from loss_functons.mean_square_error import mean_square_error
from activation_functions.LeakyReLU import LeakyReLU
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
class dense_layer:
    """
    Represents a dense layer of a neural network, as a weight matrix W of shape(out_n, in_n)
    """
    def __init__(self, input_size, num_neurons, activ_func):
        self.num_in_n = input_size
        self.num_out_n = num_neurons
        # self.weight_matrix = np.array([np.random.rand(input_size) for _ in range(num_neurons)])
        # Above line has been upgraded to line below
        self.W = np.random.randn(self.num_out_n, self.num_in_n)/2
        self.bias = np.random.randn(self.num_out_n)/2
        self.activation_func = activ_func

    def batch_input(self, X):
        self.X = X
        self.batch_size = X.shape[0]
        """
        Returns the matrix product [input_matrix] * [weight_matrix]^T of dimensions
        (batch_size, num_in_neurons) * (num_in_neurons, num_out_neurons) = (batch_size, num_out_neurons)

        
        XW^T + bias is (batch_size, num_out_neurons) + (num_out_neurons), where the bias is brodcast for each row
        """     
        self.raw_output = np.dot(self.X, self.W.T) + self.bias
        self.activation_output = self.activation_func.forward(self.raw_output)
        return self.activation_output
    
    
    def backward(self, error_matrix, learning_rate):
        """
        Given the error vector dC/da^(l), returns the new error vector for the next layer, dC/da^(l-1)

        C = cost func
        a^(l) = activation function at layer l
        z = XW^T + b
        """
        eta = learning_rate
        self.num_in_n
        self.num_out_n
        self.batch_size
        dC_da_1 = error_matrix # (batch_size, out_n)
        da_dz = self.activation_func.derivative(self.raw_output) # (batch_size, out_n)
        dC_dz = dC_da_1 * da_dz # (batch_size, num_out_n)

        # Error Gradient
        dC_dX = np.tensordot(dC_dz, self.W, axes=(1,0)) # (batch_size, in_n)
        # Gradient of W (average weight at w)
        dC_dw = np.sum(np.matmul(dC_dz , self.X.T), axis=0) / self.batch_size # (out_n)

        # Gradient of b
        dC_db = np.sum(dC_dz, axis=0) / self.batch_size # (out_n)

        self.W = self.W - (eta * dC_dw)
        self.bias = self.bias - (eta * dC_db)
        return dC_dX
    
class out_layer:
    """
    Represents the output layer of a neural network
    """
    def __init__(self, input_size, loss_func):
        self.num_in_n = input_size
        self.loss_func = loss_func
        self.num_out_n = 1
        self.W = np.random.randn(self.num_in_n)/2
        self.bias = np.random.randn(self.num_out_n)/2

    def batch_input(self, X):
        self.X = X
        self.batch_size = X.shape[0]
        """
        Returns the matrix product [input_matrix] * [weight_matrix]^T of dimensions
        (batch_size, num_in_neurons) * (num_in_neurons, num_out_neurons) = (batch_size, num_out_neurons)


        XW^T + bias is (batch_size, num_out_neurons) + (num_out_neurons), where the bias is brodcast for each row
        """     
        self.raw_output = np.dot(self.X, self.W.T) + self.bias
        return self.raw_output
    
    
    def backward(self, y_true, learning_rate):
        """
        Given the error vector dC/da^(l), returns the new error vector for the next layer, dC/da^(l-1)

        C = cost func
        a^(l) = activation function at layer l
        z = XW^T + b
        """
        eta = learning_rate

        # Error Gradient
        dC_dX = self.loss_func.dMSE_dX(X=self.X, w=self.W, b=self.bias, y=y_true)
        # Gradient of W
        dC_dw = self.loss_func.dMSE_dW(X=self.X, w=self.W, b=self.bias, y=y_true)
        # Gradient of b
        dC_db = self.loss_func.dMSE_db(X=self.X, w=self.W, b=self.bias, y=y_true)

        self.W = self.W - (eta * dC_dw)
        self.bias = self.bias - (eta * dC_db)
        return dC_dX
    

class simple_neural_network:
    """
    Represents a neural network as an array of 'NN_Layer' objects
    """
    def __init__(self, input_size):
        self.nn_array = []
        self.input_size = input_size


    def add_layer(self, num_neurons, activ_func, scale_inputs=False):
        """
        New layer must have input size corresponding to previous layer's output size
        num_neurons - is the number of neurons in the current layer
        activ_func - is the activation function that should be applied to the outputs of this layer
        """
        if(len(self.nn_array) == 0):
            self.nn_array.append(dense_layer(self.input_size,
                num_neurons, 
                activ_func))
        else:
            prev_output_size = self.nn_array[-1].weight_matrix.shape[0]
            self.nn_array.append(dense_layer(
                input_size = prev_output_size, 
                num_neurons = num_neurons, 
                activ_func=activ_func))


    def describe_network(self):
        # weight matrix shape is (num_neurons, input_size)
        for layer in self.nn_array:
            print(layer)

    def forward_pass(self, input_matrix):
        for i in range(len(self.nn_array)):
            layer = self.nn_array[i]
            input_matrix = layer.batch_input(input_matrix)    
        return input_matrix
    
    def backward_pass(self, error_vector, learning_rate):
        for i in range(len(self.nn_array), 0, -1):
            layer = self.nn_array[i-1]
            error_vector = layer.backward(error_vector, learning_rate)

In [None]:
from sklearn.preprocessing import StandardScaler
diabetes = load_diabetes()
scaler = StandardScaler()
scaler.fit(diabetes.data)
X_transformed = scaler.transform(diabetes.data)
scaler.fit(diabetes.target.reshape(-1,1))
y_transformed = scaler.transform(diabetes.target.reshape(-1,1))
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y_transformed, test_size=0.2, random_state=42, shuffle=True)
nn = simple_neural_network(10)
nn.add_layer(8, LeakyReLU(0.01))
nn.add_layer(32, LeakyReLU(0.01), scale_inputs=True)
nn.add_layer(64, LeakyReLU(0.01))
nn.add_layer(32, LeakyReLU(0.01), scale_inputs=True)
nn.add_layer(8, LeakyReLU(0.01), scale_inputs=True)
nn.add_layer(1, LeakyReLU(0.01))

# batches
mse_func = mean_square_error()
X_train_batches = [X_train[i: i+10] for i in range(0, len(X_train), 10)]
y_train_batches = [y_train[i:i+10] for i in range(0, len(y_train), 10)]

num_epochs = 100
for i in range(num_epochs):
    # Epoch
    # for i in range(len(X_train_batches)):
    for i in range(2):
        X_train_batch = X_train_batches[i]
        y_train_batch = y_train_batches[i]
        y_pred_batch = nn.forward_pass(X_train_batch)
        nn.backward_pass(mse_func.derivative(y_train_batch, y_pred_batch))
    y_true = y_train
    y_pred = nn.forward_pass(X_train)
    print('MSE Loss:', mse_func.compute(y_true=y_true, y_pred=y_pred))

# scaler.inverse_transform(y_pred.reshape(-1,1))
# print('MSE Loss:', mse_func.compute(y_true=y_true, y_pred=y_pred))
# print('dC/da', mse_func.derivative(y_true, y_pred))


In [None]:
# Testing single instance
from sklearn.preprocessing import StandardScaler
diabetes = load_diabetes()
scaler = StandardScaler()
scaler.fit(diabetes.data)
X_transformed = scaler.transform(diabetes.data)
scaler.fit(diabetes.target.reshape(-1,1))
y_transformed = scaler.transform(diabetes.target.reshape(-1,1))
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y_transformed, test_size=0.2, random_state=42, shuffle=True)
nn = simple_neural_network(10)
nn.add_layer(8, LeakyReLU(0.01))
nn.add_layer(32, LeakyReLU(0.01), scale_inputs=True)
nn.add_layer(64, LeakyReLU(0.01))
nn.add_layer(32, LeakyReLU(0.01), scale_inputs=True)
nn.add_layer(8, LeakyReLU(0.01), scale_inputs=True)
nn.add_layer(1, LeakyReLU(0.01))

# batches
mse_func = mean_square_error()
X_train_batches = [X_train[i: i+10] for i in range(0, len(X_train), 10)]
y_train_batches = [y_train[i:i+10] for i in range(0, len(y_train), 10)]



X_train_batch = X_train_batches[0]
y_train_batch = y_train_batches[0]

In [None]:
stored_mse = []

In [None]:
y_pred_batch = nn.forward_pass(X_train_batch)
dC_da_final_layer = mse_func.derivative(y_train_batch, y_pred_batch)
loss = mse_func.compute(y_train_batch, y_pred_batch)
stored_mse.append(loss)
print('Max Prediction:', np.max(np.abs(y_pred_batch)))
print('MSE Loss:', mse_func.compute(y_true=y_train_batch, y_pred=y_pred_batch))
print('dC/da', mse_func.derivative(y_train_batch, y_pred_batch))
nn.backward_pass(dC_da_final_layer, learning_rate=1)

In [None]:
import matplotlib.pyplot as plt
indices = list(range(len(y_pred_batch)))

# Create a line plot of the loss values
plt.plot(indices, y_pred_batch, marker='o', linestyle='-')
plt.plot(indices, y_train_batch, marker='o', linestyle='-')
# Add labels and a title
plt.xlabel('Index')
plt.ylabel('y_value')
plt.title('Predictions vs Ground Truth')

# Display the plot
plt.show()

In [None]:
mse_func.derivative(y_train_batch, y_pred_batch)

In [None]:
import matplotlib.pyplot as plt
indices = list(range(len(stored_mse)))

# Create a line plot of the loss values
plt.plot(indices, stored_mse, marker='o', linestyle='-')

# Add labels and a title
plt.xlabel('Index')
plt.ylabel('Loss')
plt.title('Loss Over Each Index')

# Display the plot
plt.show()

In [6]:
import sympy as sym
sym_str = ", ".join([f'x{i}' for i in range(10)])
print(sym_str)
sym_ary = sym.symbols(sym_str)
print(sym_ary)

x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9)


In [15]:
X_temp = np.array([[1,1,1], [2,2,2]])
w_temp = np.array([1,10,100])
y_temp = np.array([100,200])

print(np.matmul(X_temp, w_temp))
print(np.matmul(X_temp, w_temp) - y_temp)
print(np.outer(np.matmul(X_temp, w_temp) - y_temp, w_temp))
print(np.sum(np.outer(np.matmul(X_temp, w_temp) - y_temp, w_temp), axis=0)/ X_temp.shape[0])

[111 222]
[11 22]
[[  11  110 1100]
 [  22  220 2200]]
[  16.5  165.  1650. ]


In [16]:
X_temp.shape[0]

2