In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load and preprocess the data
df = pd.read_csv("mnist_train.csv")
df = df.dropna(inplace=False)

X = df.iloc[:, 1:]
y = df.iloc[:, 0]

X = np.array(X) / 255
y = np.array(y)

def one_hot_encode(y):
    one_hot = np.zeros((len(y), 10))
    for i in range(len(y)):
        one_hot[i][y[i]] = 1
    return one_hot

y = one_hot_encode(y)

print(f"y Shape : {y.shape}")
print(f"X Shape : {X.shape}")

# Define network architecture
layers = [784, 28, 20, 10]
weights = []
biases = []

N = len(layers)
index = 1

while index < N:
    weights.append(np.random.randn(layers[index-1], layers[index]))
    biases.append(np.random.randn(layers[index]))
    index += 1

for layer, weight in enumerate(weights):
    print(f"W{layer+1} = {weight.shape}\n")

for layer, bias in enumerate(biases):
    print(f"B{layer+1} = {bias.shape}\n")

# Activation functions and derivatives
def ReLu(x):
    return np.maximum(0, x)

def ReLu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x, axis=0)

def softmax_derivative(x):
    sm = softmax(x)
    return sm * (1 - sm)

def cost_function(a3, y):
    epsilon = 1e-15
    cost = -np.sum(y * np.log(a3 + epsilon)) / len(y)
    return cost

# Forward propagation
def forward_propagate(input, weights, biases):
    z1 = np.dot(weights[0].T, input) + biases[0]
    a1 = ReLu(z1)

    z2 = np.dot(weights[1].T, a1) + biases[1]
    a2 = ReLu(z2)

    z3 = np.dot(weights[2].T, a2) + biases[2]
    a3 = softmax(z3)

    z = [z1, z2, z3]
    a = [a1, a2, a3]

    return z, a

# Backward propagation
def backward_propagate(z, a, y):
    delta3 = a[2] - y
    delta2 = np.dot(weights[2], delta3) * ReLu_derivative(z[1])
    delta1 = np.dot(weights[1], delta2) * ReLu_derivative(z[0])

    dw3 = np.dot(delta3.reshape(10, 1), a[1].reshape(1, 20)).T / len(y)
    db3 = np.sum(delta3, keepdims=True) / len(y)

    dw2 = np.dot(delta2.reshape(20, 1), a[0].reshape(1, 28)).T / len(y)
    db2 = np.sum(delta2, keepdims=True) / len(y)

    dw1 = np.dot(delta1.reshape(28, 1), X[i].reshape(1, 784)).T / len(y)
    db1 = np.sum(delta1, keepdims=True) / len(y)

    return dw1, dw2, dw3, db1, db2, db3

# Update parameters
def update_parameters(weights, biases, dw1, dw2, dw3, db1, db2, db3, alpha):
    weights[0] = weights[0] - alpha * dw1
    weights[1] = weights[1] - alpha * dw2
    weights[2] = weights[2] - alpha * dw3
    biases[0] = biases[0] - alpha * db1
    biases[1] = biases[1] - alpha * db2
    biases[2] = biases[2] - alpha * db3
    return weights, biases


y Shape : (39003, 10)
X Shape : (39003, 784)
W1 = (784, 28)

W2 = (28, 20)

W3 = (20, 10)

B1 = (28,)

B2 = (20,)

B3 = (10,)



In [35]:
for epoch in range(1):
    for i in range(len(X)):
      input = X[i]
      z, a = forward_propagate(input, weights, biases)
      cost = cost_function(a[2], y[i])
      dw1, dw2, dw3, db1, db2, db3 = backward_propagate(z, a, y[i])
      weights, biases = update_parameters(weights, biases, dw1, dw2, dw3, db1, db2, db3, 0.001)
    print(f"Cost : {cost}")

Cost : 1.481344782460555e-05


In [4]:
def predict(input, weights, biases):
    _, a = forward_propagate(input, weights, biases)
    return np.argmax(a[2])

def calculate_accuracy(X, y, weights, biases):
    predictions = np.array([predict(X[i], weights, biases) for i in range(len(X))])
    true_labels = np.argmax(y, axis=1)
    accuracy = np.mean(predictions == true_labels)
    return accuracy

In [41]:
df = pd.read_csv("mnist_test.csv")
df = df.dropna(inplace=False)

X_test = df.iloc[:, 1:]
y_test = df.iloc[:, 0]

X_test = np.array(X_test) / 255
y_test = np.array(y_test)
y_test = one_hot_encode(y_test)

accuracy = calculate_accuracy(X_test, y_test, weights, biases)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

Training Accuracy: 94.06%


In [38]:
# Read weights from CSV files
weight_dfs = [pd.read_csv(f"weight_{i+1}.csv") for i in range(3)]
weights_loaded = [df.to_numpy() for df in weight_dfs]

# Read biases from CSV files
bias_dfs = [pd.read_csv(f"bias_{i+1}.csv") for i in range(3)]
biases_loaded = [df.to_numpy().flatten() for df in bias_dfs]

# Load weights and biases
weights = weights_loaded
biases = biases_loaded