# Assignment 4 (15 points) - Ioana-Delia Blendea

In [1]:
from torch.utils.data import Dataset
import pickle
import os
import pandas as pd
import numpy as np

class ExtendedMNISTDataset(Dataset):
    def __init__(self, root: str = "/kaggle/input/fii-nn-2025-homework-4", train: bool = True):
        file = "extended_mnist_test.pkl"
        if train:
            file = "extended_mnist_train.pkl"
        file = os.path.join(root, file)
        
        with open(file, "rb") as fp:
            self.data = pickle.load(fp)

    def __len__(self, ) -> int:
        return len(self.data)

    def __getitem__(self, i : int):
        return self.data[i]

train_data = []
train_labels = []
for image, label in ExtendedMNISTDataset(train = True):
    train_data.append(image.flatten())
    train_labels.append(label)

test_data = []
for image, label in ExtendedMNISTDataset(train = False):
    test_data.append(image.flatten())

print("all good :D")

all good :D


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.transforms import v2
from sklearn.model_selection import train_test_split

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"motorul pytorch pornit pe: {device}")

X_all = np.array(train_data, dtype = np.float32) / 255.0
y_all = np.array(train_labels, dtype = np.int64)
X_sub = np.array(test_data, dtype = np.float32) / 255.0

NUM_CLASSES = int(np.max(y_all)) + 1

X_train, X_val, y_train, y_val = train_test_split(X_all, y_all, test_size = 0.1, random_state = 42)

X_train_gpu = torch.tensor(X_train).to(device)
y_train_gpu = torch.tensor(y_train).to(device)
X_val_gpu = torch.tensor(X_val).to(device)
y_val_gpu = torch.tensor(y_val).to(device)
X_sub_gpu = torch.tensor(X_sub).to(device)

augment_transform = v2.Compose([
    v2.RandomRotation(degrees = 10),
    v2.RandomAffine(degrees = 0, translate = (0.08, 0.08), scale = (0.95, 1.05)),
])

def apply_augmentation(batch_images):
    B = batch_images.shape[0]
    img_reshaped = batch_images.view(B, 1, 28, 28)
    img_aug = augment_transform(img_reshaped)
    return img_aug.view(B, -1)

class TurboModel(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(784, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(1024, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.layers(x)

model = TurboModel(NUM_CLASSES).to(device)

motorul pytorch pornit pe: cuda


In [None]:
EPOCHS = 60
BATCH_SIZE = 512
num_samples = X_train_gpu.shape[0]
num_batches = int(np.ceil(num_samples / BATCH_SIZE))

optimizer = optim.AdamW(model.parameters(), lr = 0.001, weight_decay = 1e-4)
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = 0.01, epochs = EPOCHS, steps_per_epoch = num_batches)

for epoch in range(EPOCHS):
    model.train()
    indices = torch.randperm(num_samples, device = device)
    X_shuffled = X_train_gpu[indices]
    y_shuffled = y_train_gpu[indices]
    
    for i in range(num_batches):
        start_idx = i * BATCH_SIZE
        end_idx = min(start_idx + BATCH_SIZE, num_samples)
        
        x_batch = X_shuffled[start_idx:end_idx]
        y_batch = y_shuffled[start_idx:end_idx]

        x_batch = apply_augmentation(x_batch)

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        scheduler.step()

    if (epoch + 1) % 5 == 0:
        model.eval()
        with torch.no_grad():
            outputs = model(X_val_gpu)
            _, predicted = torch.max(outputs, 1)
            acc = (predicted == y_val_gpu).sum().item() / y_val_gpu.size(0)
        print(f"epoch {epoch+1} | acc: {acc*100:.2f}")

epoch 5 | acc: 96.55
epoch 10 | acc: 97.18
epoch 15 | acc: 97.87
epoch 20 | acc: 98.43
epoch 25 | acc: 98.70
epoch 30 | acc: 98.70
epoch 35 | acc: 99.00
epoch 40 | acc: 98.98


In [None]:
@torch.inference_mode()
def predict_with_tta(model, x_data, rounds = 10):
    model.eval()
    final_logits = model(x_data)

    for i in range(rounds):
        x_aug = apply_augmentation(x_data)
        final_logits += model(x_aug)

    return final_logits.argmax(dim=1).cpu().numpy()
    
try:
    predictions = predict_with_tta(model, X_sub_gpu, rounds = 10)
except RuntimeError:
    predictions = []
    for i in range(0, len(X_sub_gpu), 5000):
        batch = X_sub_gpu[i:i+5000]
        predictions.extend(predict_with_tta(model, batch, rounds = 10))

In [None]:
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)