# MNIST Neural Network from Scratch

This notebook implements a simple neural network to classify MNIST handwritten digits using a custom autograd engine. I've done a single pass of forward and backpropogation as a proof of concept, if this was actually ran it would take too long. 

## Features
- Loads MNIST data from CSV files
- Implements a 2-layer neural network (784 → 256 → 10)
- Uses custom micrograd engine for automatic differentiation
- Demonstrates basic deep learning concepts with minimal dependencies

## Requirements
- Python 3.x
- CSV module
- Custom micrograd implementation from the learning module

## Usage
Run the cells sequentially to train and evaluate the neural network on MNIST data.


In [1]:
import csv
import os
import sys


PROJECT_ROOT = "/home/nlin/workspace/code/projects/autograd_cpp"
sys.path.append(PROJECT_ROOT)

In [2]:
def load_mnist_from_file(csvfile):
    reader = csv.reader(csvfile)

    data = []
    labels = []
    next(reader)
    for row in reader:
        row = [float(i) for i in row]
        data.append([i * 1.0 / 255 for i in row[1:]])
        labels.append([1 if row[0] == i else 0 for i in range(10)])

    n = len(data)
    csvfile.close()

    return data, labels, n


csvfile = open(os.path.join(PROJECT_ROOT, "data/mnist_test_short.csv"))
xs, ys, n = load_mnist_from_file(csvfile)

In [3]:
from learning.micrograd_engine import Value
from learning.micrograd_nn import Layer

nin = 28 * 28
W1 = Layer(nin, 256)
W2 = Layer(256, 10)

In [4]:
def softmax(x):
    """
    Inputs an array of 10 values, output scales them
    """
    assert len(x) == 10
    epsilon = 1e-4
    denom = sum([i.exp() for i in x], epsilon)
    return [i.exp() / denom for i in x]


def forward(x):
    Z1 = W1(x, relu=True)
    Z2 = softmax(W2(Z1, relu=False))
    return Z2

In [5]:
def cross_entropy_loss(preds):
    y, yp = preds
    return -sum([i * j.log() for i, j in zip(y, yp)])


from concurrent.futures import ThreadPoolExecutor

xs = xs[:10]
ys = ys[:10]

# ypred = [forward(x) for x in xs]
# loss = Value(0.0)
# for y, yp in zip(ys, ypred):
#     loss += -sum([i * j.log() for i, j in zip(y, yp)])

with ThreadPoolExecutor(max_workers=4) as executor:
    ypred = list(executor.map(forward, xs))
    losses = list(executor.map(cross_entropy_loss, zip(ys, ypred)))
loss = sum(losses, Value(0.0))

print(loss)

loss.backward()

alpha = 0.01
params = [*W1.parameters(), *W2.parameters()]

for value in params:
    value.data -= alpha * value.grad
    value.grad = 0.0

# new_ypred = [forward(x) for x in xs]
# new_loss = Value(0.0)
# for y, yp in zip(ys, new_ypred):
#     new_loss += -sum([i * j.log() for i, j in zip(y, yp)])

with ThreadPoolExecutor(max_workers=4) as executor:
    new_ypred = list(executor.map(forward, xs))
    new_losses = list(executor.map(cross_entropy_loss, zip(ys, new_ypred)))
new_loss = sum(new_losses, Value(0.0))

print(new_loss)

Value(data=477.56915473196807)
Value(data=93.90524682789142)
