In [10]:
# -----------------------------
# MNIST DIGIT CLASSIFIER (PyTorch)
# -----------------------------

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import gradio as gr

In [11]:
# -----------------------------
# 1. LOAD DATA
# Transforms are preprocessing steps that get applied automatically to every image
# you load from a dataset.
# Think of transforms as a recipe that says:

# “Every time you give me an image, do X, then Y, then Z to it.”
# “For every MNIST image: convert it to a PyTorch tensor.
# MNIST images come in as PIL images (Python Imaging Library).

# But your neural network expects tensors.
# -----------------------------
transform = transforms.Compose([
    transforms.ToTensor()
])

In [12]:
# Load training dataset (MNIST)
train_dataset = datasets.MNIST(
    root="./data",
    train=True,
    transform=transform,
    download=True
)


In [13]:
# Load test dataset
test_dataset = datasets.MNIST(
    root="./data",
    train=False,
    transform=transform,
    download=True
)

In [14]:
# Make DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=128, shuffle=False)

# TODO: Access and print the unique labels in the training data set using the train_loader object
num_outputs = train_dataset.classes
print(num_outputs)


['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']


In [15]:
# -----------------------------
# 2. DEFINE NEURAL NETWORK
# TODO: Design a Neural Network with 1 hidden layer of 128 neurons
# -----------------------------
class SimpleNN(nn.Module):
    def __init__(self, n_features, n_hidden, n_outputs):
        super().__init__()

        # TODO: Define layers
        self.fc1 = nn.Linear(n_features, n_hidden)
        self.fc2 = nn.Linear(n_hidden, n_hidden)
        self.fc3 = nn.Linear(n_hidden, n_outputs)


    def forward(self, x):
        # Flatten image: (batch, 1, 28, 28) → (batch, 784)
        x = x.view(-1, 28*28)

        # TODO: Add activation between layers
        z1 = F.sigmoid(self.fc1(x))
        z2 = F.relu(self.fc2(z1))


        # TODO: Output layer
        output = self.fc3(z2)
        return output

        return x

In [16]:
# TODO: Create the model

model = SimpleNN(n_features = 784,n_hidden = 128, n_outputs = 10)

In [17]:
# -----------------------------
# 3. LOSS FUNCTION + OPTIMIZER
# -----------------------------
# TODO: Define your loss function
loss_function =  loss_function = nn.CrossEntropyLoss()



# TODO: Setup your gradient descent . Try different values for the learning rate
optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=0.9)

In [18]:
# ------------------------------
# 4. TRAINING LOOP
# ------------------------------

epochs = 80
epochs_per_print = 10          # how many epochs to average over

group_loss = 0.0               # accumulates loss over epochs in a group

for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    num_batches = 0

    for images, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute loss
        loss = loss_function(outputs, labels)

        # Backprop + update
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        num_batches += 1

    # average loss for this epoch
    avg_loss = total_loss / num_batches

    # accumulate into 10-epoch group
    group_loss += avg_loss

    # every 10 epochs, print the average over those 10
    if (epoch + 1) % epochs_per_print == 0:
        group_avg_loss = group_loss / epochs_per_print
        start_epoch = epoch + 1 - (epochs_per_print - 1)
        end_epoch = epoch + 1
        print(f"Epochs {start_epoch}-{end_epoch}, Avg Loss: {group_avg_loss:.4f}")

        # reset for next group
        group_loss = 0.0

Epochs 1-10, Avg Loss: 0.2427
Epochs 11-20, Avg Loss: 0.0603
Epochs 21-30, Avg Loss: 0.0268
Epochs 31-40, Avg Loss: 0.0114
Epochs 41-50, Avg Loss: 0.0049
Epochs 51-60, Avg Loss: 0.0025
Epochs 61-70, Avg Loss: 0.0016
Epochs 71-80, Avg Loss: 0.0011


In [19]:
correct = 0
total = 0
model.eval()

with torch.no_grad():
    for images, labels in test_loader:
        # TODO: Forward pass
        outputs = model(images)

        # Predicted class = index of max logit
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 97.83%


In [None]:
import gradio as gr
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
from PIL import Image, ImageOps

# -----------------------------
# 6. TEST SINGLE PREDICTION
# -----------------------------
# -----------------------------
# 6. TEST SINGLE PREDICTION
# -----------------------------
# ------------------------------
# Gradio Sketchpad gives you:

# * a full-color NumPy array

# * black digit on white background

# * large resolution

# * no consistent scale
#
# Hence the preprocessing
# ------------------------------

def preprocess_image(image):
    sketch_transform = transforms.Compose([
    transforms.ToPILImage(),                      # NumPy → PIL
    transforms.Grayscale(),                       # ensure 1 channel
    transforms.Resize((28, 28)),                  # 28x28 like MNIST
    transforms.Lambda(lambda img: ImageOps.invert(img)),  # invert colors
    transforms.ToTensor(),                        # → tensor, shape (1,28,28), values in [0,1]
    ])
    # Gradio Sketchpad sometimes passes a dict with 'composite'
    if isinstance(image, dict):
        image = image['composite']   # this is a NumPy array

    # Apply the preprocessing transform
    img_tensor = sketch_transform(image)  # (1, 28, 28)

    # Add batch dimension → (1, 1, 28, 28)
    img_tensor = img_tensor.unsqueeze(0)

    return img_tensor

def predict_digit(image):
    # --- STEP 1: CHECK IF SOMETHING HAS BEEN DRAWN ---
    if image is None: return "Draw something!"

    # --- STEP 2: PREPROCESS THE IMAGE ---
    img_tensor = preprocess_image(image)

    # --- STEP 3: RUN THE MODEL ---
    with torch.no_grad():
        prediction = model(img_tensor)

        # Get the index of the highest score (the predicted digit)
        predicted_digit = torch.argmax(prediction).item()

    return str(predicted_digit)

# UI Setup
interface = gr.Interface(fn=predict_digit, inputs=gr.Sketchpad(label="Draw Here"), outputs="label")
interface.queue().launch()