In [None]:
# === Load libraries ===
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import os
from PIL import Image
import pandas as pd

torch.manual_seed(42)

In [39]:
# === Dataloaders ===
class ImageDataset(Dataset):
    def __init__(self, image_folder, label_csv, transform=None):
        self.image_folder = image_folder
        self.labels_df = pd.read_csv(label_csv)
        self.transform = transform

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, f'{self.labels_df.iloc[idx, 0]}.png')
        image = Image.open(img_name)
        label = self.labels_df.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, label

# Image transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize RGB
])


# Load the dataset
image_folder = 'data/train'
label_csv = 'data/train/labels_train.csv'
dataset = ImageDataset(image_folder=image_folder, label_csv=label_csv, transform=transform)

# Split dataset into training and validation sets (80-20 split)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoader for training and validation sets
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)  # Mimicking seminar code
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)     # No shuffling for validation

# Initialize device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [41]:
# === CNN architecture ===
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=26):
        super(CNNClassifier, self).__init__()

        # Convolutional Layer Block 1
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  # Batch normalization
        self.pool1 = nn.MaxPool2d(2, 2)

        # Convolutional Layer Block 2
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  # Batch normalization
        self.pool2 = nn.MaxPool2d(2, 2)

        # Convolutional Layer Block 3 (Deeper layer)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)  # Batch normalization
        self.pool3 = nn.MaxPool2d(2, 2)

        # Convolutional Layer Block 4 (Even deeper layer)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(256)  # Batch normalization
        self.pool4 = nn.MaxPool2d(2, 2)

        # Fully connected layers
        self.fc1 = nn.Linear(256 * 8 * 8, 512)  # Adjusted for the new size after pooling
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        # First convolution block
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))

        # Second convolution block
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))

        # Third convolution block (deeper)
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))

        # Fourth convolution block (even deeper)
        x = self.pool4(F.relu(self.bn4(self.conv4(x))))

        # Flatten the tensor before passing into fully connected layers
        x = x.view(-1, 256 * 8 * 8)  # Flattening the output to pass into fc layers

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [42]:
import torch
from torch.utils.data import Subset
from torch.utils.data import DataLoader

# Select a subset of the dataset (e.g., the first 1000 samples for training)
subset_train_size = 2500  # Set to the number of samples you want to test
subset_val_size = 800  # Set a smaller subset for validation

# Get indices for the subsets (you can change the subset size as per your requirement)
train_indices = list(range(subset_train_size))
val_indices = list(range(subset_val_size))

# Create subsets for training and validation
train_subset = Subset(train_dataset, train_indices)
val_subset = Subset(val_dataset, val_indices)

# Create DataLoader for the subsets
train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)

# Training loop
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=20):
    train_losses = []
    val_losses = []
    val_accuracies = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        # Training phase
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total

        val_losses.append(avg_val_loss)
        val_accuracies.append(val_accuracy)

        print(f"Epoch [{epoch+1}/{epochs}] "
              f"Train Loss: {avg_train_loss:.4f} "
              f"Val Loss: {avg_val_loss:.4f} "
              f"Val Accuracy: {val_accuracy*100:.2f}%")

    return train_losses, val_losses, val_accuracies

# Start training with subsets for 5 epochs to test
train_losses, val_losses, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, epochs=20
)


Epoch 1/20: 100%|██████████| 79/79 [03:00<00:00,  2.28s/it]


Epoch [1/20] Train Loss: 5.9302 Val Loss: 3.2601 Val Accuracy: 3.25%


Epoch 2/20: 100%|██████████| 79/79 [02:53<00:00,  2.20s/it]


Epoch [2/20] Train Loss: 3.2617 Val Loss: 3.2605 Val Accuracy: 3.50%


Epoch 3/20: 100%|██████████| 79/79 [02:46<00:00,  2.11s/it]


Epoch [3/20] Train Loss: 3.2577 Val Loss: 3.2611 Val Accuracy: 3.50%


Epoch 4/20: 100%|██████████| 79/79 [02:42<00:00,  2.06s/it]


Epoch [4/20] Train Loss: 3.2575 Val Loss: 3.2617 Val Accuracy: 3.50%


Epoch 5/20: 100%|██████████| 79/79 [03:42<00:00,  2.81s/it]


Epoch [5/20] Train Loss: 3.2571 Val Loss: 3.2623 Val Accuracy: 3.50%


Epoch 6/20: 100%|██████████| 79/79 [03:05<00:00,  2.35s/it]


Epoch [6/20] Train Loss: 3.2566 Val Loss: 3.2630 Val Accuracy: 3.50%


Epoch 7/20: 100%|██████████| 79/79 [02:56<00:00,  2.23s/it]


Epoch [7/20] Train Loss: 3.2570 Val Loss: 3.2636 Val Accuracy: 3.50%


Epoch 8/20: 100%|██████████| 79/79 [02:52<00:00,  2.18s/it]


Epoch [8/20] Train Loss: 3.2567 Val Loss: 3.2642 Val Accuracy: 3.50%


Epoch 9/20: 100%|██████████| 79/79 [02:45<00:00,  2.10s/it]


Epoch [9/20] Train Loss: 3.2564 Val Loss: 3.2645 Val Accuracy: 3.50%


Epoch 10/20:  77%|███████▋  | 61/79 [02:37<00:46,  2.59s/it]


KeyboardInterrupt: 

In [43]:
# Sanity check with small batch

small_batch = []
for i, (images, labels) in enumerate(train_loader):
    small_batch.append((images, labels))
    if len(small_batch) >= 1:  # Only one batch
        break

images, labels = small_batch[0]
images, labels = images.to(device), labels.to(device)

model = CNNClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train on the same batch repeatedly
for epoch in range(100):  # Fewer if it works early
    model.train()
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, predicted = torch.max(outputs, 1)
    acc = (predicted == labels).sum().item() / labels.size(0)

    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}, Accuracy = {acc*100:.2f}%")

    if loss.item() < 0.01:
        print("Model successfully overfit the small batch.")
        break



Epoch 1: Loss = 3.1673, Accuracy = 9.38%
Epoch 2: Loss = 19.1405, Accuracy = 15.62%
Epoch 3: Loss = 20.4328, Accuracy = 15.62%
Epoch 4: Loss = 17.9636, Accuracy = 15.62%
Epoch 5: Loss = 17.8767, Accuracy = 6.25%
Epoch 6: Loss = 15.6003, Accuracy = 15.62%
Epoch 7: Loss = 11.5479, Accuracy = 15.62%
Epoch 8: Loss = 9.9866, Accuracy = 21.88%
Epoch 9: Loss = 6.8675, Accuracy = 21.88%
Epoch 10: Loss = 5.4709, Accuracy = 31.25%
Epoch 11: Loss = 4.7171, Accuracy = 40.62%
Epoch 12: Loss = 3.7913, Accuracy = 21.88%
Epoch 13: Loss = 1.5055, Accuracy = 50.00%
Epoch 14: Loss = 1.1423, Accuracy = 68.75%
Epoch 15: Loss = 0.9413, Accuracy = 68.75%
Epoch 16: Loss = 0.9015, Accuracy = 68.75%
Epoch 17: Loss = 0.8724, Accuracy = 75.00%
Epoch 18: Loss = 0.8468, Accuracy = 71.88%
Epoch 19: Loss = 0.3145, Accuracy = 87.50%
Epoch 20: Loss = 0.3242, Accuracy = 93.75%
Epoch 21: Loss = 0.1574, Accuracy = 96.88%
Epoch 22: Loss = 0.2539, Accuracy = 96.88%
Epoch 23: Loss = 0.1223, Accuracy = 96.88%
Epoch 24: Loss =

In [None]:
# === Training loop final ===
# Training loop
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    train_losses = []
    val_losses = []
    val_accuracies = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_train_loss = running_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total

        val_losses.append(avg_val_loss)
        val_accuracies.append(val_accuracy)

        print(f"Epoch [{epoch+1}/{epochs}] "
              f"Train Loss: {avg_train_loss:.4f} "
              f"Val Loss: {avg_val_loss:.4f} "
              f"Val Accuracy: {val_accuracy*100:.2f}%")

    return train_losses, val_losses, val_accuracies

train_losses, val_losses, val_accuracies = train_model(
    model, train_loader, val_loader, criterion, optimizer, epochs=10
)