# Load the data set

In [28]:
from torchvision import datasets
from torchvision.transforms import ToTensor

train_data = datasets.MNIST(root='data', train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST(root='data', train=False, download=True, transform=ToTensor())

# Create data loaders

In [29]:
from torch.utils.data import DataLoader

batch_size = 64

train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Modify target labels to represent probabilities

In [30]:
def make_probability(num_labels, label):
    return [1 if i == label else 0 for i in range(num_labels)]

num_labels = 10

# Initialize the model

In [31]:
from neural_network.neural_network import NeuralNetwork
from neural_network.functions.loss_functions import CrossEntropy
from neural_network.functions.activation_functions import ReLU, Softmax

hidden_layer_sizes = [784, 128, 64, 10]
hidden_layer_activations = [ReLU(), ReLU(), Softmax()]
loss_function =CrossEntropy()

model = NeuralNetwork(hidden_layer_sizes, hidden_layer_activations, loss_function)

learning_rate = 0.01
regularization_rate = 0.01
momentum = 0.9

# Train the model

In [32]:
import torch
import numpy as np


def predict(y_pred):
    return np.argmax(y_pred)


def my_flatten(data: torch.Tensor) -> list[float]:
    return data.flatten().tolist()

In [33]:
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

model.reset()

loss_values = []
accuracy_values = []

num_epochs = 10
for epoch in tqdm(range(num_epochs)):
    print(f'Epoch: {epoch + 1}/{num_epochs}\n--------------')
    
    train_loss = 0
    train_accuracy = 0
    for batch, (X, y) in enumerate(train_dataloader):
        X = np.array([my_flatten(x) for x in X])
        y = np.array([make_probability(num_labels, label) for label in y])
        
        for x, y in zip(X, y):
            y_pred = model.forward(x)
            
            loss = model.get_loss(y, y_pred)
            train_loss += loss
            
            model.backward(y)
            model.apply_gradients(learning_rate, regularization_rate, momentum)
            
            choice = predict(y_pred)
            train_accuracy += 1 if choice == np.argmax(y) else 0
            
        if batch % 400 == 0:
            print(f'Looked at {batch * len(X)}/{len(train_dataloader.dataset)} samples')
            
    train_loss /= len(train_data)
    train_accuracy /= len(train_data)
    
    loss_values.append(train_loss)
    accuracy_values.append(train_accuracy)
    
    print(f'Train Loss: {train_loss}, Train Accuracy: {train_accuracy}')        

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1/10
--------------
Looked at 0/60000 samples
Looked at 25600/60000 samples


KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(10, 5))

ax[0].plot(loss_values)
ax[0].set_xlabel("Epoch")
ax[0].set_ylabel("Loss")

ax[1].plot(accuracy_values)
ax[1].set_xlabel("Epoch")
ax[1].set_ylabel("Accuracy")

plt.tight_layout()
plt.show()