<a href="https://colab.research.google.com/github/flanaganc04/My-own-Neural-Net/blob/main/Digit_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Packages

In [1]:
from google.colab import drive
import os
import datetime
# import matplotlib as mpl
# import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import math

# Mount Google Drive

In [2]:
# Mount Google Drive and try not to get yelled at
drive.mount('/content/drive')

Mounted at /content/drive


# Functions

In [3]:
def hiddenLayer(neurons: int, matrix: np.ndarray) -> np.ndarray:
    """
    Builds a random weight matrix, multiplies it by the transpose
    of the input matrix, and returns the resulting hidden layer matrix and weight matrix for later in backpropagation.

    Parameters
    ----------
    neurons : int
        Number of neurons in the layer.
    matrix : np.ndarray
        Input data (e.g., training or testing data).

    Returns
    -------
    np.ndarray
        The computed hidden layer matrix and the weight matrix.
    """

    # Create a random weight matrix with values uniformly distributed between -1 and 1
    weightMatrix = np.random.uniform(low=-1.0, high=1.0, size=(neurons, matrix.shape[1]))
    # Compute the hidden layer activations by performing matrix multiplication
    hiddenMatrix = matrix @ weightMatrix.T
    # Return both the hidden layer activations and the weight matrix
    return hiddenMatrix, weightMatrix

In [4]:
def softMax(matrix: np.ndarray) -> np.ndarray:
  """
    Takes a matrix, and applies the soft max function to each element. x_i,j = e^(x_i,j)/sum(e^(x_i,j)). So take the exponential of the rows, sum it, and divide each element by that sum.

    Parameters
    ----------
    matrix : np.ndarray
        Array in which soft max should be applied (e.g., hidden layer or output layer).

    Returns
    -------
    np.ndarray
        The computed soft max matrix.
    """
  for y in range(matrix.shape[0]):
    sum = 0
    # Compute the denominator: sum of exponentials for each row
    try:
      for x in range(matrix.shape[1]):
        sum += math.exp(matrix[y][x])
    except OverflowError:
      print(f'Sum e^(x_i) OverflowError: row:{y} and column:{x}')
    except ZeroDivisionError:
      print(f'Sum e^(x_i) OverflowError: row:{y} and column:{x}')
    # Apply softmax normalization for each element in the row
    try:
      for x in range(matrix.shape[1]):
        matrix[y][x] = math.exp(matrix[y][x]) / sum
    except OverflowError:
      print(f'[e^(x_i)/sum] OverflowError: row:{y} and column:{x}')
    except ZeroDivisionError:
      print(f'[e^(x_i)/sum] OverflowError: row:{y} and column:{x}')
  # Return the softmax-normalized matrix
  return matrix

In [None]:
def ForwardProp()

In [5]:
def OneHotEncoder(matrix: np.ndarray) -> np.ndarray:
    """
    Convert an integer label matrix (values 0..N) into a one-hot encoded matrix.

    Parameters
    ----------
    matrix : np.ndarray
        Input array of integer class labels (shape: (n_samples,) or (n_samples, 1)).

    Returns
    -------
    one_hot_matrix : np.ndarray
        One-hot encoded array of shape (n_samples, num_classes).
    """
    # Ensure integer dtype (in case it's float or object from pandas)
    matrix = matrix.astype(int).flatten()
    # Determine number of classes (assumes classes are labeled 0..N)
    num_classes = np.max(matrix) + 1
    # Create one-hot encoding
    one_hot_matrix = np.eye(num_classes)[matrix]
    return one_hot_matrix

# Read Data

In [6]:
# Import data
test_data = pd.read_csv('/content/drive/MyDrive/Digit Recognizer/mnist_test.csv')
train_data = pd.read_csv('/content/drive/MyDrive/Digit Recognizer/mnist_train.csv')

X_train = train_data.iloc[0:, 1:]  # Features
X_train_np = X_train.to_numpy() *(1/255)#
# print(X_train_np[0])

Y_train = train_data.iloc[0:, 0:1]   # Target variable
Y_train_np = Y_train.to_numpy()
# print(Y_train)

X_test = test_data.iloc[1:, 1:]  # Features
X_test_np = X_test.to_numpy() *(1/255)
# print(X_test)

Y_test = test_data.iloc[1:, 0:1]   # Target variable
Y_test_np = Y_test.to_numpy()
# print(Y_test)

# Forward Propagation 1

In [7]:
# Make layer 1
layer1, weights1 = hiddenLayer(25, X_train_np)
softLayer1 = softMax(layer1)

In [8]:
# Make layer 2
layer2, weights2 = hiddenLayer(10, softLayer1)
softLayer2 = softMax(layer2)

In [9]:
# print(layer2[5])

In [10]:
biasMatrix = np.ones(softLayer2.shape)
biasMatrix = np.array(biasMatrix)
outputMatrix = softLayer2 + biasMatrix
# print(outputMatrix[0])

In [11]:
softOput = softMax(outputMatrix)
# print(softOput[0])

# Back Propagation 1

In [12]:
# cce for the 2nd hidden layer
Y_train_onehot = OneHotEncoder(Y_train_np)
# print(Y_train_onehot.shape)
# print(Y_train_onehot)
output_square = Y_train_onehot
logOput = np.log(softOput)
CCE = -np.sum((output_square * logOput), axis = 1)
CCE_avg = np.mean(CCE)
# print(CCE_avg)
# print(CCE)

In [13]:
# ∂CCCE/∂W
# backprop layer for the 2nd hidden layer
partial_cost_predictions = -logOput
partial_activation_weightsum = np.ones(logOput.shape)
partial_weightsum_weight = layer2
partial_weightsum_bias = np.ones(layer2.shape)

gradient_w = partial_cost_predictions * partial_activation_weightsum * partial_weightsum_weight
gradient_b = partial_cost_predictions * partial_activation_weightsum * partial_weightsum_bias
avg_grad_w = np.mean(gradient_w, axis = 0, keepdims=True)
avg_grad_b = np.mean(gradient_b, axis = 0, keepdims=True)
# print(avg_grad_w)
# print(avg_grad_w)

In [29]:
# ∂CCCE/∂W
# backprop layer for the 1st hidden layer
partial_cost_predictions2_sum2 = softOput - Y_train_onehot*(1/Y_train_onehot.shape[0])
partial_weightsum2_weight2 = weights2
temp = partial_cost_predictions2_sum2 @ weights2
partial_weightsum1_weight1 = X_train_np
gradient_w1 = temp.T @ partial_weightsum1_weight1
# print(partial_cost_predictions_sum)

In [31]:
eta = 0.1
weights2_2 = -eta*avg_grad_w + weights2.T
weights2_2 = weights2_2.T
bias_2 = -eta*avg_grad_b + biasMatrix

weights1_2 = -eta*gradient_w1 + weights1
weights1_2 = weights1_2.T
# print(weights2_2)
print(weights1_2)
# print(bias_2)

[[-0.67533149  0.11666411  0.7228179  ...  0.54347909 -0.20669654
   0.66041637]
 [ 0.83713727  0.53910653  0.76926672 ... -0.48300898 -0.82072687
  -0.38406557]
 [-0.5023125  -0.44038706  0.72621272 ...  0.88206873 -0.23032368
  -0.21242735]
 ...
 [ 0.11413399 -0.89505935  0.6137933  ... -0.93735846 -0.89964051
   0.42357732]
 [ 0.1997963  -0.02074437  0.85047283 ...  0.935407    0.71521986
   0.87707217]
 [ 0.81946068 -0.08832096 -0.70074192 ... -0.45948359 -0.27952952
  -0.51354605]]


Forward Propogation 2