# Comparison of optimization alghorithms
In this example, a classic neural network is created using the `neural` framework. This network is then trained on the MNIST data set of hand-written digits, using SGD and Adam algorithms in order to compare their convergence rates.

In [None]:
import matplotlib.pyplot as plt

In [None]:
import sys
sys.path.append("..")

In [None]:
import numpy as np
import time

from neural import MNIST, Tensor, nn, optim
from utils import *

## Importing MNIST training data

In [None]:
# Loading training set
allTrainImages, allTrainLabels = MNIST.get("train")
# Images are normalized, all values are in the range [-1, 1]
allTrainImages = normalize(allTrainImages, 0.5, 0.5)

## Defining the Neural Network architecture

In [None]:
class Network(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        
    def forward(self, x):
        x = nn.ReLU()(self.fc1(x))
        x = nn.Dropout(p=0.2)(nn.ReLU()(self.fc2(x)))
        x = self.fc3(x)
        return x
    
model = Network()

## Choosing training criterion (loss function)

In [None]:
# Loss function
reduction = "mean"
criterion = nn.CrossEntropyLoss(reduction=reduction)

## Training

### Choosing optimizer parameters

In [None]:
# SGD setup
SGDSetup = dict(
    lr = 0.03,
    momentum = 0.9)

# Adam setup
AdamSetup = dict(
    lr = 0.001,
    betas = (0.9, 0.999),
    eps = 1e-08)

configurations = ((optim.SGD, SGDSetup, "SGD"), (optim.Adam, AdamSetup, "Adam"))

## Training

### Choosing training parameters

In [None]:
epochs = 1
batchSize = 300

In [None]:
numBatches = allTrainImages.shape[0] // batchSize
numTraining = int(numBatches * batchSize)

print(f"Number of epochs: {epochs}")
print(f"Batch size: {batchSize}")
print(f"Total number of train images: {numTraining}")
print(f"Total number of batches: {numBatches}")

# Reshaping training data
trainImages = allTrainImages[:numTraining].reshape(numBatches, -1, allTrainImages.shape[-2], allTrainImages.shape[-1])
trainLabels = allTrainLabels[:numTraining].reshape(numBatches, -1)

## Training using each algorithm

In [None]:
for algorithm, config, name in configurations:
    print(f"Started training using {name} algorithm")
    model = Network()    

    optimizer = algorithm(
        model.parameters(),
        **config)
    
    numBatches = allTrainImages.shape[0] // batchSize
    numTraining = int(numBatches * batchSize)

    trainImages = allTrainImages[:numTraining].reshape(numBatches, -1, allTrainImages.shape[-2], allTrainImages.shape[-1])
    trainLabels = allTrainLabels[:numTraining].reshape(numBatches, -1)
        
    lossTrack = np.zeros(numBatches)
    startTime = time.time()
    for i, (images, labels) in enumerate(zip(trainImages, trainLabels)):
        images = images.reshape(images.shape[0], -1)
        optimizer.zeroGrad()
        out = model(images)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()
        lossTrack[i] = loss.item()
    else:
        endTime = time.time()
        print(f"Finished training in {endTime - startTime:.2f}s\n")
        np.savetxt(f"lossTrack_{name}", lossTrack)

In [None]:
plots = [(np.loadtxt(f"lossTrack_{name}"), batchSize, name) for _, _, name in configurations]

plotLossTrack(plots)