In [None]:
import torch
import torchvision.datasets as datasets # for Mist
import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation
from torch import optim # For optimizers like SGD, Adam, etc.
from torch import nn # To inherit our neural network
from torch.utils.data import DataLoader # For management of the dataset (batches)
from tqdm import tqdm # For nice progress bar!
from torch.optim.lr_scheduler import StepLR
from torch.optim.lr_scheduler import CosineAnnealingLR
import numpy as np


In [None]:
import flappy_bird_gymnasium
import gymnasium

In [41]:
if torch.cuda.is_available():
    device = "cuda:0"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

device = torch.device(device)
print("Device: ", device)

Device:  mps


In [42]:
# Set hyperparameters

data_type = torch.float32
input_size = 784
epochs = 15
batch_size = 64
learning_rate = 0.0012
num_classes = 10

In [43]:
# Load Data
train_transforms = transforms.Compose([
    transforms.RandomRotation(10),     # Rotate +/- 10 degrees
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)), # Shift image by 10%
    transforms.ToTensor()
])

# Apply these transforms ONLY to the training set
train_dataset = datasets.MNIST(root="dataset/", train=True, transform=train_transforms, download=True)
test_dataset = datasets.MNIST(root="dataset/", train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [44]:
relu = nn.ReLU()
softmax = nn.Softmax(dim=1)
loss_function = nn.CrossEntropyLoss()

def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)

In [45]:
layers = nn.Sequential(
    nn.Flatten(), # Ensures input is flattened automatically
    
    # Layer 1: Wide expansion
    nn.Linear(784, 512), 
    nn.BatchNorm1d(512), # normalization keeps values stable
    nn.ReLU(),
    #nn.Dropout(0.2),     # prevents memorization
    
    # Layer 2: Maintaining width
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    #nn.Dropout(0.2),
    
    # Layer 3: Output
    nn.Linear(256, 10)
)

layers.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Linear(in_features=256, out_features=10, bias=True)
)

In [46]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = layers
    
    def forward(self, input):
        return self.layers(input)

In [47]:
model = NN().to(device)

optimizer = optim.Adam(params= model.parameters(), lr = learning_rate, weight_decay=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=epochs, eta_min=1e-6)


In [48]:
# Print model summary
print(model)

# Print hyperparameters
print(f"Data type: {data_type}")
print(f"Input size: {input_size}")
print(f"Epochs: {epochs}")
print(f"Batch size: {batch_size}")
print(f"Learning rate: {learning_rate}")
print(f"Number of classes: {num_classes}")
print(f"Device: {device}")
print(f"Loss function: {loss_function}")
print(f"Optimizer: {optimizer}")
print(next(model.parameters()))

def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            predictions = softmax(scores).argmax(dim=1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(f"Got {num_correct} / {num_samples} correct ({float(num_correct)/float(num_samples)*100:.2f}%)")
    
    model.train()


NN(
  (layers): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Linear(in_features=256, out_features=10, bias=True)
  )
)
Data type: torch.float32
Input size: 784
Epochs: 15
Batch size: 64
Learning rate: 0.0012
Number of classes: 10
Device: mps
Loss function: CrossEntropyLoss()
Optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    decoupled_weight_decay: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    initial_lr: 0.0012
    lr: 0.0012
    maximize: False
    weight_decay: 0.0001
)
Parameter containing:
tensor([[-0.0601, -0.0344, -0.0022,  ...,  0.0513, -0

In [None]:
for epoch in range(epochs):
    total_loss = 0
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):

        # Get data to cuda/mps if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Flatten image
        data = data.reshape(data.shape[0], -1)

        scores = model(data)
        loss = loss_function(scores, targets)
        total_loss += loss.item()
        
        # Backward
        optimizer.zero_grad()
        loss.backward()

        # Update weights
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader)}")
    scheduler.step()

100%|██████████| 938/938 [00:07<00:00, 117.92it/s]


Epoch 1/15, Loss: 0.3676938540788729
Accuracy on training set:
Got 56480 / 60000 correct (94.13%)
Accuracy on test set:
Got 9688 / 10000 correct (96.88%)


100%|██████████| 938/938 [00:08<00:00, 117.00it/s]


Epoch 2/15, Loss: 0.19472320911202476
Accuracy on training set:
Got 57388 / 60000 correct (95.65%)
Accuracy on test set:
Got 9736 / 10000 correct (97.36%)


100%|██████████| 938/938 [00:08<00:00, 108.93it/s]


Epoch 3/15, Loss: 0.16088385077546846
Accuracy on training set:
Got 57669 / 60000 correct (96.11%)
Accuracy on test set:
Got 9775 / 10000 correct (97.75%)


100%|██████████| 938/938 [00:08<00:00, 110.38it/s]


Epoch 4/15, Loss: 0.1397021505802965
Accuracy on training set:
Got 57967 / 60000 correct (96.61%)
Accuracy on test set:
Got 9815 / 10000 correct (98.15%)


100%|██████████| 938/938 [00:08<00:00, 112.45it/s]


Epoch 5/15, Loss: 0.1270073192722317
Accuracy on training set:
Got 57999 / 60000 correct (96.67%)
Accuracy on test set:
Got 9818 / 10000 correct (98.18%)


100%|██████████| 938/938 [00:07<00:00, 123.93it/s]


Epoch 6/15, Loss: 0.11531567542370894
Accuracy on training set:
Got 58131 / 60000 correct (96.89%)
Accuracy on test set:
Got 9843 / 10000 correct (98.43%)


100%|██████████| 938/938 [00:08<00:00, 111.97it/s]


Epoch 7/15, Loss: 0.1057416654076737
Accuracy on training set:
Got 58516 / 60000 correct (97.53%)
Accuracy on test set:
Got 9861 / 10000 correct (98.61%)


100%|██████████| 938/938 [00:09<00:00, 98.23it/s] 


Epoch 8/15, Loss: 0.09413813754294648
Accuracy on training set:
Got 58625 / 60000 correct (97.71%)
Accuracy on test set:
Got 9841 / 10000 correct (98.41%)


100%|██████████| 938/938 [00:08<00:00, 109.39it/s]


Epoch 9/15, Loss: 0.08455694809522249
Accuracy on training set:
Got 58809 / 60000 correct (98.02%)
Accuracy on test set:
Got 9886 / 10000 correct (98.86%)


100%|██████████| 938/938 [00:08<00:00, 106.48it/s]


Epoch 10/15, Loss: 0.07755342794598134
Accuracy on training set:
Got 58953 / 60000 correct (98.26%)
Accuracy on test set:
Got 9889 / 10000 correct (98.89%)


100%|██████████| 938/938 [00:08<00:00, 106.03it/s]


Epoch 11/15, Loss: 0.06942413114398512
Accuracy on training set:
Got 59029 / 60000 correct (98.38%)
Accuracy on test set:
Got 9896 / 10000 correct (98.96%)


100%|██████████| 938/938 [00:09<00:00, 102.61it/s]


Epoch 12/15, Loss: 0.06460328869172123
Accuracy on training set:
Got 59110 / 60000 correct (98.52%)
Accuracy on test set:
Got 9901 / 10000 correct (99.01%)


100%|██████████| 938/938 [00:09<00:00, 103.41it/s]


Epoch 13/15, Loss: 0.05756620101621752
Accuracy on training set:
Got 59208 / 60000 correct (98.68%)
Accuracy on test set:
Got 9908 / 10000 correct (99.08%)


100%|██████████| 938/938 [00:09<00:00, 96.74it/s] 


Epoch 14/15, Loss: 0.056097011828769264
Accuracy on training set:
Got 59216 / 60000 correct (98.69%)
Accuracy on test set:
Got 9909 / 10000 correct (99.09%)


100%|██████████| 938/938 [00:08<00:00, 107.49it/s]


Epoch 15/15, Loss: 0.051602877049482486
Accuracy on training set:
Got 59258 / 60000 correct (98.76%)
Accuracy on test set:
Got 9920 / 10000 correct (99.20%)


In [50]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            probs = softmax(scores)
            predictions = torch.argmax(probs, dim=1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(f"Got {num_correct} / {num_samples} correct ({float(num_correct)/float(num_samples)*100:.2f}%)")
    
    model.train()

In [51]:
# print accuracy
print("Accuracy on training set:")
check_accuracy(train_loader, model)
print("Accuracy on test set:")
check_accuracy(test_loader, model)

Accuracy on training set:
Got 59295 / 60000 correct (98.83%)
Accuracy on test set:
Got 9920 / 10000 correct (99.20%)
