# Problem Set 1 - Neural network implementation

As described in section "3 Neural network implementation" of assignment 1, the goal is to build a shallow neural network from scratch using different approaches. To validate that your code is working and that the network is actually learning something, please use the following MNIST classification task. Finally, please submit proof of the learning progress as described in the assignment.

## Imports

In [None]:
import random
import pandas as pd
import numpy as np
from sklearn import model_selection
import sklearn.datasets as sk_datasets
import torchvision.datasets as torch_datasets
from torchvision import transforms
import torch
import matplotlib.pyplot as plt

from scratch.network import Network
from scratch.res_network import ResNetwork
from pytorch.network import TorchNetwork
from scratch.utils import *

In [3]:
# Automatically load changes in imported modules
%load_ext autoreload
%autoreload 2

# Explicitly set seed for reproducibility
GLOBAL_RANDOM_STATE = 42

random.seed(GLOBAL_RANDOM_STATE)
np.random.seed(GLOBAL_RANDOM_STATE)

## A) Neural Network Classifier from Scratch

### Data

In [4]:
# Download MNIST dataset
x, y_cat = sk_datasets.fetch_openml('mnist_784', version=1, return_X_y=True, cache=True, as_frame=False)

# Preprocess dataset
x = (x / 255).astype('float32')
y_cat = y_cat.astype(int)
# One-hot encode y
y = np.zeros((len(y_cat), 10))
for i, val in enumerate(y_cat):
    y[i, val] = 1

# Use only small subset of data for faster training
x = x[:1000]
y = y[:1000]

# Split data into train and validation set
x_train, x_val, y_train, y_val = model_selection.train_test_split(x, y, test_size=0.2, random_state=GLOBAL_RANDOM_STATE)



### ML Model & Training

In [None]:
fnn = Network(sizes=[784, 128, 64, 10], learning_rate=0.1, epochs=50)
fnn.fit(x_train, y_train, x_val, y_val, cosine_annealing_lr=False)

### Test cosine annealing scheduler

In [None]:
fnn.fit(x_train, y_train, x_val, y_val, cosine_annealing_lr=True)

### Test residual neural network

In [None]:
res_nn = ResNetwork(sizes=[784, 128, 128, 10], learning_rate=1, epochs=50)
res_nn.fit(x_train, y_train, x_val, y_val)

## B) Neural Network Classifier using Torch

### Data

In [48]:
# Define data preprocessing steps
transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.5,), (0.5,))
            ])

# Download MNIST dataset
train_set = torch_datasets.MNIST('data', train=True, download=True, transform=transform)
val_set = torch_datasets.MNIST('data', train=False, download=True, transform=transform)

# Use only small subset of data for faster training
train_set = torch.utils.data.Subset(train_set, range(1000))
val_set = torch.utils.data.Subset(val_set, range(1000))

# Utilize PyTorch DataLoader from simplified & harmonized loading of data
train_loader = torch.utils.data.DataLoader(train_set, batch_size=1)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=1)


### ML Model & Training

In [None]:
torch_nn = TorchNetwork(sizes=[784, 128, 64, 10], learning_rate=0.2, epochs=50, random_state=GLOBAL_RANDOM_STATE)
torch_nn.fit(train_loader, val_loader)

## C) Visualize accuracy & hyperparameter tuning

Here, you should compare the accuracy of all trained models. Optionally, you can also show the results of hyperparameter tuning and comment which hyperparameters work best for this task.

In [None]:
    "### BEGIN SOLUTION ###\n",
    "\n",
    "# Feedforward NN\n",
    "fnn = Network(sizes=[784, 128, 64, 10], learning_rate=0.1, epochs=50)\n",
    "fnn.fit(x_train, y_train, x_val, y_val)\n",
    "\n",
    "# Feedforward NN + Cosine Annealing\n",
    "fnn_cosine = Network(sizes=[784, 128, 64, 10], learning_rate=0.1, epochs=50)\n",
    "fnn_cosine.fit(x_train, y_train, x_val, y_val, cosine_annealing_lr=True)\n",
    "\n",
    "# Residual NN\n",
    "res_nn = ResNetwork(sizes=[784, 128, 128, 10], learning_rate=0.1, epochs=50)\n",
    "res_nn.fit(x_train, y_train, x_val, y_val)\n",
    "\n",
    "# PyTorch NN\n",
    "torch_nn = TorchNetwork(sizes=[784, 128, 64, 10], learning_rate=0.2, epochs=50, random_state=GLOBAL_RANDOM_STATE)\n",
    "torch_nn.fit(train_loader, val_loader)\n",
    "\n",
    "# Compute accuracies for all the models\n",
    "fnn_train_acc = float(fnn.compute_accuracy(x_train, y_train))\n",
    "fnn_val_acc = float(fnn.compute_accuracy(x_val, y_val))\n",
    "\n",
    "fnn_cosine_train_acc = float(fnn_cosine.compute_accuracy(x_train, y_train))\n",
    "fnn_cosine_val_acc = float(fnn_cosine.compute_accuracy(x_val, y_val))\n",
    "\n",
    "res_nn_train_acc = float(res_nn.compute_accuracy(x_train, y_train))\n",
    "res_nn_val_acc = float(res_nn.compute_accuracy(x_val, y_val))\n",
    "\n",
    "torch_train_acc = torch_nn.predict(train_loader)\n",
    "torch_val_acc = torch_nn.predict(val_loader)\n",
    "\n",
    "# Build dictionary\n",
    "results = {\n",
    "    \"Feedforward NN\": (fnn_train_acc, fnn_val_acc),\n",
    "    \"Feedforward + CosineAnnealing\": (fnn_cosine_train_acc, fnn_cosine_val_acc),\n",
    "    \"Residual NN\": (res_nn_train_acc, res_nn_val_acc),\n",
    "    \"Torch NN\": (torch_train_acc, torch_val_acc)\n",
    "}\n",
    "\n",
    "# Plot\n",
    "models = list(results.keys())\n",
    "train_accs = [results[m][0] for m in models]\n",
    "val_accs = [results[m][1] for m in models]\n",
    "\n",
    "x = np.arange(len(models))\n",
    "width = 0.35\n",
    "\n",
    "plt.figure(figsize=(10,6))\n",
    "plt.bar(x - width/2, train_accs, width, label='Train')\n",
    "plt.bar(x + width/2, val_accs, width, label='Validation')\n",
    "\n",
    "plt.xticks(x, models, rotation=20)\n",
    "plt.ylabel(\"Accuracy\")\n",
    "plt.title(\"MNIST Classification Accuracy\")\n",
    "plt.legend()\n",
    "plt.grid(axis='y')\n",
    "plt.show()\n",
    "\n",
    "### END SOLUTION ###\n"
