In [1]:
from google.colab import files

print("Please upload 'train.mat' and 'test.mat'")
uploaded = files.upload()

for fn in uploaded.keys():
  print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')

Please upload 'train.mat' and 'test.mat'


Saving train.mat to train.mat
Saving test.mat to test.mat
User uploaded file "train.mat" with length 136210806 bytes
User uploaded file "test.mat" with length 217823927 bytes


In [2]:

import os
import numpy as np
import scipy.io as sio
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

DATA_DIR = "./"  # folder containing train.mat & test.mat (later used google import to upload)
BATCH_SIZE = 64
NUM_CLASSES = 10

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

# ----------------------------
# Preprocessing / Transforms
# 1)  Resize to 64x64
# 2) Convert to  tensor
# 3) Normalize w/  given mean/std
stl10_transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.4467, 0.4398, 0.4066),
                         std=(0.2241, 0.2215, 0.2239))
])

# ----------------------------
# Custom Dataset using train.mat / test.mat

class STL10MatDataset(Dataset):
    def __init__(self, mat_path, transform=None):
        data = sio.loadmat(mat_path)
        X = data["X"]          # shape: (N, 27648)
        y = data["y"].squeeze()  # shape: (N,)
        # convert labels from 1..10 to 0..9
        self.labels = (y - 1).astype(np.int64)

        self.images = X
        self.transform = transform

    def __len__(self):
        return self.images.shape[0]

    def __getitem__(self, idx):
        # X is flattened as 27648 = 96*96*3
        vec = self.images[idx]
        img = vec.reshape(3, 96, 96).transpose(1, 2, 0)  # (H, W, C)
        img = img.astype(np.uint8)
        pil_img = Image.fromarray(img)

        if self.transform is not None:
            img_tensor = self.transform(pil_img)
        else:
            img_tensor = transforms.ToTensor()(pil_img)

        label = self.labels[idx]
        return img_tensor, label

# ----------------------------
# Dataloaders

train_dataset = STL10MatDataset(os.path.join(DATA_DIR, "train.mat"),
                                transform=stl10_transform)
test_dataset = STL10MatDataset(os.path.join(DATA_DIR, "test.mat"),
                               transform=stl10_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                         shuffle=False, num_workers=2, pin_memory=True)

print("Train size:", len(train_dataset))
print("Test size:", len(test_dataset))

# ----------------------------

# 2. ANN


class ANN_STL10(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(3 * 64 * 64, 1024)   # 12,288 -> 1,024
        self.fc2 = nn.Linear(1024, 512)           # 1,024 -> 512
        self.fc3 = nn.Linear(512, NUM_CLASSES)    # 512 -> 10
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.flatten(x)       # (N, 3*64*64)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)           # logits
        return x


# ----------------------------
# 3. CNN



class CNN_STL10(nn.Module):
    def __init__(self):
        super().__init__()

        # 1) Conv 1: in=3, out=32, kernel=5, padding=2, ReLU
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32,
                               kernel_size=5, padding=2)
        # 2) MaxPool: kernel=2, stride=2
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # 3) Conv 2: in=32, out=64, kernel=3, padding=1, ReLU
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64,
                               kernel_size=3, padding=1)

        # 4) Conv 3: in=64, out=128, kernel=3, padding=1, ReLU
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128,
                               kernel_size=3, padding=1)

        # 5) MaxPool: kernel=2, stride=2
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # After the the layers:
        # Input: 3 x  64 x 64
        # conv1: 32 x 64  x 64
        # pool1: 32 x 32 x 32
        # conv2: 64 x 32  x 32
        # conv3: 128 x  32 x  32
        # pool2: 128 x 16 x 16
        # Flatten:  128 * 16 * 16 = 32,768
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * 16 * 16, 512)   # 32,768  ---> 512
        self.fc2 = nn.Linear(512, NUM_CLASSES)     #  512 --=>  10
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool1(x)

        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.pool2(x)

        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# ----------------------------
# Training



def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model.to(device)
    epoch_losses = []

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        total = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            batch_size = labels.size(0)
            running_loss += loss.item() * batch_size
            total += batch_size

        epoch_loss = running_loss / total
        epoch_losses.append(epoch_loss)
        print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {epoch_loss:.4f}")

    return epoch_losses

def evaluate_model(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100.0 * correct / total
    return accuracy

# ----------------------------
# 4. Train ANN (5 epochs)

ann = ANN_STL10().to(DEVICE)
criterion_ann = nn.CrossEntropyLoss()
optimizer_ann = optim.Adam(ann.parameters(), lr=0.001)

print("\nTraining ANN...")
ann_train_losses = train_model(
    model=ann,
    train_loader=train_loader,
    criterion=criterion_ann,
    optimizer=optimizer_ann,
    num_epochs=5,      # 5  epochs
    device=DEVICE
)

ann_test_acc = evaluate_model(ann, test_loader, DEVICE)
print(f"ANN Test Accuracy: {ann_test_acc:.2f}%")


# ----------------------------
# 5. Train CNN (15 epochs)

cnn = CNN_STL10().to(DEVICE)
criterion_cnn = nn.CrossEntropyLoss()
optimizer_cnn = optim.Adam(cnn.parameters(), lr=0.0001)

print("\nTraining CNN...")
cnn_train_losses = train_model(
    model=cnn,
    train_loader=train_loader,
    criterion=criterion_cnn,
    optimizer=optimizer_cnn,
    num_epochs=15,      #  15 epochs
    device=DEVICE
)

cnn_test_acc = evaluate_model(cnn, test_loader, DEVICE)
print(f"CNN Test Accuracy: {cnn_test_acc:.2f}%")



Using device: cpu
Train size: 5000
Test size: 8000

Training ANN...




Epoch [1/5] - Loss: 2.0611
Epoch [2/5] - Loss: 1.6482
Epoch [3/5] - Loss: 1.4764
Epoch [4/5] - Loss: 1.3172
Epoch [5/5] - Loss: 1.2291
ANN Test Accuracy: 38.71%

Training CNN...
Epoch [1/15] - Loss: 1.8460
Epoch [2/15] - Loss: 1.4683
Epoch [3/15] - Loss: 1.3100
Epoch [4/15] - Loss: 1.1782
Epoch [5/15] - Loss: 1.0578
Epoch [6/15] - Loss: 0.9715
Epoch [7/15] - Loss: 0.8965
Epoch [8/15] - Loss: 0.7917
Epoch [9/15] - Loss: 0.7161
Epoch [10/15] - Loss: 0.6337
Epoch [11/15] - Loss: 0.5466
Epoch [12/15] - Loss: 0.4741
Epoch [13/15] - Loss: 0.4103
Epoch [14/15] - Loss: 0.3482
Epoch [15/15] - Loss: 0.2745
CNN Test Accuracy: 51.89%
