# CMPSC 445 - M7 Assignment

### Loading Data
MNIST Dataset

In [17]:
from torchvision import datasets, transforms

# defining preprocessing transformations on mnist dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# loading mnist training data
mnist_training_data = datasets.MNIST(
    './data/mnist', 
    train=True, 
    download=True,
    transform=transform
)

# loading mnist test data
mnist_testing_data = datasets.MNIST(
    './data/mnist', 
    train=False, 
    transform=transform
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1000)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/mnist/MNIST/raw






### Define Model

Involves:
- Initialization
- Forward Propagation
- Training Function
- Testing Function
- Model Training Function

In [15]:
import torch
from torch import nn as nn

class NeuralNet(nn.Module):
    """  
    Neural network model for classification tasks.

    This model consists of two hidden layers with ReLU activation 
    and an output layer that uses log softmax for multi-class classification.

    Attributes:
        fc1 (nn.Linear): The first fully connected hidden layer (input: 784, output: 128).
        fc2 (nn.Linear): The second fully connected hidden layer (input: 128, output: 64).
        final_layer (nn.Linear): The final layer for output (input: 64, output: 10).
    
    """
    def __init__(self):
        super().__init__()

        # hidden layers with ReLU activations
        # first fully connected hidden layer (input: 784, output: 128)
        self.fc1 = nn.Linear(28*28, 128)
        # second fully connected hidden layer (input: 128, output: 64)
        self.fc2 = nn.Linear(128, 64)
        # final layer (output: 10 for classification, assuming 10 classes)
        self.final_layer = nn.Linear(64, 10)

    def forward(self, x):
        # flatten input image (assuming input is of shape [batch_size, 1, 28, 28])
        x = x.view(-1, 28*28)

        # pass through hidden layers with ReLU activations
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))

        # pass through final layer
        output = self.final_layer(x)
        # applying log softmax to the output (for NLL loss)
        output = nn.functional.log_softmax(output, dim=1)

        return output
    
    def training(model, train_dataloader, optimizer, print_freq=10):
        model.train()   # set model to training mode

        train_loss = 0

        for batch_index, (data, target) in enumerate(train_dataloader):
            optimizer.zero_grad()   # zero the gradients
            output = model(data)    # forward pass through the model

            # calculate loss using negative log-likelihood
            loss = nn.functional.nll_loss(output, target)

            # backpropagation
            loss.backward() 

            # update model parameters
            optimizer.step()

            # sum up loss
            train_loss += loss.item() * data.shape[0]

            # print current training loss at specified intervals
            if not (batch_index % print_freq):
                print(
                    f"Train Batch {batch_index}/{len(train_dataloader)} Loss: {loss.item():.4f}"
                )

        # return average training loss
        return train_loss / len(train_dataloader.dataset)
    
    def testing(model, test_dataloader):
        model.eval()    # set model to training mode

        test_loss = 0
        correct = 0

        # no need to compute gradients during evaluation
        with torch.no_grad():
            for data, target in test_dataloader:
                # forward pass through the model
                output = model(data)

                # calculate loss
                test_loss += nn.functional.nll_loss(output, target, reduction='sum').item()

                # get predictions by taking argmax of the output (the class with the highest score)
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        # average loss over test dataset
        test_loss /= len(test_dataloader.dataset)

        # calculate accuracy
        test_accuracy = correct / len(test_dataloader.dataset)

        return test_loss, test_accuracy
    
    def train_model(model, train_dataloader, test_dataloader, optimizer, num_epochs):
        for i in range(num_epochs):
            # train model for one epoch
            train_loss = NeuralNet.training(model, train_dataloader, optimizer)

            # test model on validation set
            test_loss, test_accuracy = NeuralNet.testing(model, test_dataloader)

            # print stats
            print(
                f"Epoch: {i+1} | Train Loss: {train_loss:.5f} |",
                f"Test Loss: {test_loss:.5f} | Test Accuracy: {test_accuracy:.5f}"
            )

# model = NeuralNet()
# print(f"Model: {model}")
# print(f"Parameter Sum: {sum([torch.prod(torch.tensor(i.shape)) for i in model.parameters()])}")

### Training Model

Results Summary:
- Model improved steadily, with both training and test losses decreasing and test accuracy rising consistently.
The model is generalizing well to the test data, evidenced by a strong performance on the test set.
- The absence of signs of overfitting (i.e., test accuracy keeps improving without a significant rise in test loss) suggested a well-balanced model.
- The results indicated that the model would be on track and likely to perform well in real-world scenarios.

Model Output Training Statistics:
| Epoch | Train Loss | Test Loss | Test Accuracy |
| :----: | :---------: | :-------- | :------------: |
| 1     | 0.32063    | 0.15165   | 0.95537       |
| 2     | 0.13151    | 0.09308   | 0.97242       |
| 3     | 0.09001    | 0.06999   | 0.97922       |
| 4     | 0.06998    | 0.05546   | 0.98257       |
| 5     | 0.05499    | 0.03792   | 0.98860       |
| 6     | 0.04414    | 0.03082   | 0.99087       |
| 7     | 0.03761    | 0.02451   | 0.99242       |
| 8     | 0.02950    | 0.02248   | 0.99293       |
| 9     | 0.02404    | 0.01487   | 0.99523       |
| 10    | 0.02402    | 0.02228   | 0.99182       |

In [None]:
# dataloader for training data
train_dataloader = torch.utils.data.DataLoader(
    mnist_training_data,
    batch_size = 128, 
    shuffle = True
)

# dataloader for testing data
test_dataloader = torch.utils.data.DataLoader(
    mnist_training_data,
    batch_size = 128, 
    shuffle = False
)

# running model train function
model = NeuralNet()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

NeuralNet.train_model(
    model,
    train_dataloader,
    test_dataloader,
    optimizer,
    num_epochs=10
)

### CIFAR-10 Bonus

In [None]:
# defining preprocessing transformations on cifar-10 dataset
transform2 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# loading cifar-10 training data
cifar10_training_data = datasets.MNIST(
    './data/cifar10', 
    train=True, 
    download=True,
    transform=transform
)

# loading cifar-10 test data
cifar10_testing_data = datasets.MNIST(
    './data/cifar10', 
    train=False, 
    transform=transform
)``