<a href="https://colab.research.google.com/github/flanaganc04/My-own-Neural-Net/blob/main/Digit_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Packages

In [2]:
from google.colab import drive
import os
import datetime
# import matplotlib as mpl
# import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import math

# Mount Google Drive

In [3]:
# Mount Google Drive and try not to get yelled at
drive.mount('/content/drive')

Mounted at /content/drive


# **Functions**

## Read Data Files

In [4]:
def readData(train_data: pd.DataFrame, test_data: pd.DataFrame) -> np.ndarray:
    """
    Converts training and testing DataFrames into normalized NumPy arrays
    for input features and target labels, preparing them for model training
    and evaluation.

    Parameters
    ----------
    train_data : pd.DataFrame
        DataFrame containing the training data, where the first column holds
        target labels and the remaining columns hold feature values.
    test_data : pd.DataFrame
        DataFrame containing the testing data, where the first column holds
        target labels and the remaining columns hold feature values.

    Returns
    -------
    np.ndarray
        Tuple of four NumPy arrays: (X_train, Y_train, X_test, Y_test),
        where features are scaled to the [0, 1] range by dividing by 255.
    """
    X_train = train_data.iloc[:, 1:]
    X_train_np = X_train.to_numpy() * (1 / 255)

    Y_train = train_data.iloc[:, 0:1]
    Y_train_np = Y_train.to_numpy()

    X_test = test_data.iloc[:, 1:]
    X_test_np = X_test.to_numpy() * (1 / 255)

    Y_test = test_data.iloc[:, 0:1]
    Y_test_np = Y_test.to_numpy()

    return X_train_np, Y_train_np, X_test_np, Y_test_np


## Hidden Layer

In [5]:
def hiddenLayer(neurons: int, matrix: np.ndarray) -> np.ndarray:
    """
    Builds a random weight matrix, multiplies it by the transpose
    of the input matrix, and returns the resulting hidden layer matrix and weight matrix for later in backpropagation.

    Parameters
    ----------
    neurons : int
        Number of neurons in the layer.
    matrix : np.ndarray
        Input data (e.g., training or testing data).

    Returns
    -------
    np.ndarray
        The computed hidden layer matrix and the weight matrix.
    """

    # Create a random weight matrix with values uniformly distributed between -1 and 1
    weightMatrix = np.random.uniform(low=-1.0, high=1.0, size=(neurons, matrix.shape[1]))
    # Compute the hidden layer activations by performing matrix multiplication
    hiddenMatrix = matrix @ weightMatrix.T
    # Return both the hidden layer activations and the weight matrix
    return hiddenMatrix, weightMatrix

## Soft Max

In [6]:
def softMax(matrix: np.ndarray) -> np.ndarray:
  """
    Takes a matrix, and applies the soft max function to each element. x_i,j = e^(x_i,j)/sum(e^(x_i,j)). So take the exponential of the rows, sum it, and divide each element by that sum.

    Parameters
    ----------
    matrix : np.ndarray
        Array in which soft max should be applied (e.g., hidden layer or output layer).

    Returns
    -------
    np.ndarray
        The computed soft max matrix.
    """
  matrixTemp = matrix.copy() # Create a copy to avoid modifying the original matrix
  for y in range(matrixTemp.shape[0]):
    sum = 0
    # Compute the denominator: sum of exponentials for each row
    try:
      for x in range(matrixTemp.shape[1]):
        sum += math.exp(matrixTemp[y][x])
    except OverflowError:
      print(f'Sum e^(x_i) OverflowError: row:{y} and column:{x}')
    except ZeroDivisionError:
      print(f'Sum e^(x_i) OverflowError: row:{y} and column:{x}')
    # Apply softmax normalization for each element in the row
    try:
      for x in range(matrixTemp.shape[1]):
        matrixTemp[y][x] = math.exp(matrixTemp[y][x]) / sum
    except OverflowError:
      print(f'[e^(x_i)/sum] OverflowError: row:{y} and column:{x}')
    except ZeroDivisionError:
      print(f'[e^(x_i)/sum] OverflowError: row:{y} and column:{x}')
  # Return the softmax-normalized matrix
  return matrixTemp

## Forward Prop

In [19]:
def ForwardProp(neurons: np.ndarray, training_data):
    """
    Performs forward propagation through a neural network with specified hidden layers.
    Adds bias only to the output layer.

    Returns
    -------
    layers: list of np.ndarray
        Raw (pre-activation) outputs of each layer.
    weights: list of np.ndarray
        Weight matrices for each layer.
    biases: list of np.ndarray
        Bias vectors (only last layer has non-zero bias).
    softlayers: list of np.ndarray
        Softmax-activated outputs of each layer.
    """
    layers = []
    weights = []
    softlayers = []
    biases = []  # <-- changed to list

    # First hidden layer
    layer, weight = hiddenLayer(neurons[0], training_data)
    layers.append(layer)
    weights.append(weight)
    softLayer = softMax(layer)
    softlayers.append(softLayer)

    # Subsequent hidden layers
    for x in range(len(neurons) - 1):
        layer, weight = hiddenLayer(neurons[x + 1], softlayers[x])
        layers.append(layer)
        weights.append(weight)
        softLayer = softMax(layer)
        softlayers.append(softLayer)

    # Output layer (with bias addition)
    bias = np.ones((1, 10))
    layer, weight = hiddenLayer(10, softlayers[-1])
    layer = layer + bias  # Add bias only here
    layers.append(layer)
    weights.append(weight)
    biases.append(bias)  # <-- now works (biases is list)
    softLayer = softMax(layer)
    softlayers.append(softLayer)

    return layers, weights, biases, softlayers


## One Hot Encoder

In [8]:
def OneHotEncoder(matrix: np.ndarray) -> np.ndarray:
    """
    Convert an integer label matrix (values 0..N) into a one-hot encoded matrix. Sometimes referred to as one hot vectors.

    Parameters
    ----------
    matrix : np.ndarray
        Input array of integer class labels (shape: (n_samples,) or (n_samples, 1)).

    Returns
    -------
    one_hot_matrix : np.ndarray
        One-hot encoded array of shape (n_samples, num_classes).
    """
    # Ensure integer dtype (in case it's float or object from pandas)
    matrix = matrix.astype(int).flatten()
    # Determine number of classes (assumes classes are labeled 0..N)
    num_classes = np.max(matrix) + 1
    # Create one-hot encoding
    one_hot_matrix = np.eye(num_classes)[matrix]
    return one_hot_matrix

## Categorical Cross Entropy

In [9]:
def CCE(y_train, softlayers):
  Y_train_onehot = OneHotEncoder(y_train)
  output_square = Y_train_onehot
  logOput = np.log(softlayers[-1])
  CCE = -np.sum((output_square * logOput), axis = 1)
  CCE_avg = np.mean(CCE)
  return Y_train_onehot, logOput, CCE_avg

## Back Prop

In [38]:
def BackProp(x_train: np.ndarray, y_train: np.ndarray, layers, weights, biases, softlayers, eta: float):
  layersScalar = len(layers)

  updated_weights = [None] * len(weights)
  updated_biases  = [None] * len(biases)
  partial_products = [None] *(len(layers))
  big_guy = [None] *1
  # cce
  Y_train_onehot, logOput, CCE_avg = CCE(y_train, softlayers)

  # last layer
  omega = softlayers[-1] - Y_train_onehot*(1/Y_train_onehot.shape[0])
  partial_products[0] = omega
  big_guy[0] = omega
  partial_weightsum_weight = layers[-1]
  partial_weightsum_bias = np.ones(layers[-1].shape)

  gradient_w = omega * partial_weightsum_weight
  gradient_b = omega * partial_weightsum_bias
  avg_grad_w = np.mean(gradient_w, axis = 0, keepdims=True)
  avg_grad_b = np.mean(gradient_b, axis = 0, keepdims=True)

  updated_weights[0] = -eta*avg_grad_w + weights[-1].T
  updated_weights[0] = updated_weights[0].T
  updated_biases = -eta*avg_grad_b + biases

  # subsequent layers
  for x in range(layersScalar-1):
    gamma = softlayers[-layersScalar+(x+1)]
    partial_products[x+1] = gamma
    big_guy[0] = big_guy[0]* gamma
    gradient_w = big_guy[0] * layers[-layersScalar+(x+1)]
    avg_grad_w = np.mean(gradient_w, axis = 0, keepdims=True)
    updated_weights[x+1] = -eta*avg_grad_w + weights[-layersScalar+(x+1)].T

  return updated_weights, updated_biases

# Full Code

In [11]:
train_data = pd.read_csv('/content/drive/MyDrive/Digit Recognizer/mnist_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/Digit Recognizer/mnist_test.csv')

neurons = [25] #[n_1,n_2,n_2...n_i]
eta = 0.1 #learning rate

In [39]:
X_train, Y_train, X_test, Y_test = readData(train_data, test_data)
layers, weights, biases, softlayers = ForwardProp(neurons, X_train)
Y_train_onehot, logOput, CCE_avg = CCE(Y_train, softlayers)
updated_weights, updated_biases = BackProp(X_train, Y_train, layers, weights, biases, softlayers, eta)
