## Question 2







In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

# A modified version of LeNet5 with modern tweaks
class ModifiedLeNet5(nn.Module):
    def __init__(self):
        super(ModifiedLeNet5, self).__init__()
        # Input is 32x32
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1)
        # Max pool replaces average pool, kernel_size=2
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        # After second conv and pool, we flatten and go to FC layers

        # C5 and F6 replaced with standard fc layers:
        # After conv layers:
        # Input: 32x32 -> conv1(6@28x28) -> pool -> 6@14x14
        # conv2 -> 16@10x10 -> pool -> 16@5x5
        # Flatten: 16*5*5 = 400
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        # Add dropout for better generalization
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)

        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)  # We will apply softmax in the loss function, so no final activation
        return x

In [2]:

import torch.optim as optim
from PIL import Image
import io
from torch.utils.data import TensorDataset, DataLoader
from torchvision import transforms
import torch.nn.functional as TorchFunc

import pandas as pd
from PIL import Image



splits = {'train': 'mnist/train-00000-of-00001.parquet', 'test': 'mnist/test-00000-of-00001.parquet'}
df_train = pd.read_parquet("hf://datasets/ylecun/mnist/" + splits["train"])
df_test = pd.read_parquet("hf://datasets/ylecun/mnist/" + splits["test"])


# Data augmentation transforms
# Here we add random rotations, shifts, and slight scaling
train_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomAffine(degrees=15, translate=(0.1,0.1), scale=(0.9,1.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  # Normalizing MNIST
])

test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Assuming df_train and df_test are loaded as in your original code
# Modify your get_images function to return PIL images directly, then apply transforms
# Or simply adapt to run transforms here.
from PIL import Image
import io

def get_dataset(df, transform):
    images = []
    labels = []
    for row in df.itertuples():
        img = Image.open(io.BytesIO(row[1]['bytes'])).convert('L')  # Ensure grayscale
        img = transform(img)
        images.append(img)
        labels.append(row[2])

    images = torch.stack(images)
    labels = torch.tensor(labels, dtype=torch.long)
    dataset = TensorDataset(images, labels)
    return dataset

train_dataset = get_dataset(df_train, train_transform)
test_dataset = get_dataset(df_test, test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize model, loss, optimizer
model = ModifiedLeNet5().to('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    total_correct = 0
    total_samples = 0
    device = next(model.parameters()).device

    for data, targets in loader:
        data, targets = data.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * data.size(0)
        _, predicted = outputs.max(1)
        total_correct += predicted.eq(targets).sum().item()
        total_samples += data.size(0)

    avg_loss = running_loss / total_samples
    accuracy = total_correct / total_samples
    return avg_loss, accuracy

def test(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    total_correct = 0
    total_samples = 0
    device = next(model.parameters()).device

    with torch.no_grad():
        for data, targets in loader:
            data, targets = data.to(device), targets.to(device)
            outputs = model(data)
            loss = criterion(outputs, targets)

            running_loss += loss.item() * data.size(0)
            _, predicted = outputs.max(1)
            total_correct += predicted.eq(targets).sum().item()
            total_samples += data.size(0)

    avg_loss = running_loss / total_samples
    accuracy = total_correct / total_samples
    return avg_loss, accuracy

# Training loop
epochs = 20
for epoch in range(1, epochs + 1):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    test_loss, test_acc = test(model, test_loader, criterion)
    print(f"Epoch {epoch}/{epochs}, Train Loss: {train_loss:.4f}, "
          f"Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Epoch 1/20, Train Loss: 0.7391, Train Acc: 0.7611, Test Acc: 0.9727
Epoch 2/20, Train Loss: 0.3086, Train Acc: 0.9125, Test Acc: 0.9823
Epoch 3/20, Train Loss: 0.2346, Train Acc: 0.9345, Test Acc: 0.9872
Epoch 4/20, Train Loss: 0.1959, Train Acc: 0.9450, Test Acc: 0.9879
Epoch 5/20, Train Loss: 0.1750, Train Acc: 0.9510, Test Acc: 0.9879
Epoch 6/20, Train Loss: 0.1561, Train Acc: 0.9574, Test Acc: 0.9885
Epoch 7/20, Train Loss: 0.1439, Train Acc: 0.9600, Test Acc: 0.9901
Epoch 8/20, Train Loss: 0.1351, Train Acc: 0.9611, Test Acc: 0.9869
Epoch 9/20, Train Loss: 0.1251, Train Acc: 0.9651, Test Acc: 0.9912
Epoch 10/20, Train Loss: 0.1195, Train Acc: 0.9664, Test Acc: 0.9919
Epoch 11/20, Train Loss: 0.1117, Train Acc: 0.9680, Test Acc: 0.9904
Epoch 12/20, Train Loss: 0.1078, Train Acc: 0.9691, Test Acc: 0.9912
Epoch 13/20, Train Loss: 0.1034, Train Acc: 0.9694, Test Acc: 0.9908
Epoch 14/20, Train Loss: 0.0982, Train Acc: 0.9720, Test Acc: 0.9922
Epoch 15/20, Train Loss: 0.0941, Train Acc:

In [3]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the save path in Google Drive
save_path = "/content/drive/MyDrive/MLHW4/LeNet_2.pth"

# Save the model's state dictionary
torch.save(model.state_dict(), save_path)

print(f"Model saved to {save_path}")


Mounted at /content/drive
Model saved to /content/drive/MyDrive/MLHW4/LeNet_2.pth
