<a href="https://colab.research.google.com/github/hendrikyong/CVNL_Assignment_1/blob/main/CVNL_P02_GP01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Imports
import os
import torch
import kagglehub
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Download dataset from KaggleHub
path = kagglehub.dataset_download("grassknoted/asl-alphabet")

#define paths to the training and testing data directories
data_dir = '/root/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1/asl_alphabet_train/asl_alphabet_train'
# no longer using the test_dir because there is only 1 image per class and there is simply not enough for evaluation
# test_dir = '/root/.cache/kagglehub/datasets/grassknoted/asl-alphabet/versions/1/asl_alphabet_test/asl_alphabet_test'

# Check dataset files
print("Path to dataset:", data_dir)

# Normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Define image transformations
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomCrop(128, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# Load full dataset (folders act as labels)
full_dataset = datasets.ImageFolder(root=data_dir)

# Define train-validation split
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Apply transforms after the split (to avoid validation augmentation)
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

# Print dataset summary
print(f"Total images: {len(full_dataset)}")
print(f"Training images: {len(train_dataset)}")
print(f"Validation images: {len(val_dataset)}")

In [None]:
#model cnn model
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
'''
typical architecture of a CNN
1. input
2. conv
3. relu
4. pooling
5. fully connected layers
5. output pred

considerations:
how many conv layers do i need for feature extraction?
how many hidden layers?
how many channels?
'''

class CNN(nn.Module):
    def __init__(self, num_classes=29):
        super(CNN, self).__init__()

        self.model = nn.Sequential(
            # conv1
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # conv2
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # conv3
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 16 * 16, 512),  # Correct input size
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(512, num_classes)
        )


    def forward(self, x):
        x = self.model(x)
        #print(x.shape)
        x = self.fc(x)
        return x

In [None]:
from tqdm import tqdm  # Make sure to import tqdm

def train(model, train_loader, loss_func, optimizer, device, epochs):
    model.train()  # Set the model to training mode

    for epoch in range(epochs):
        epoch_loss = 0.0
        correct = 0
        total = 0

        # Initialize tqdm progress bar for the current epoch
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=True)

        for batch_idx, (inputs, targets) in enumerate(progress_bar):
            inputs, targets = inputs.to(device), targets.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = loss_func(outputs, targets)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Update epoch loss and accuracy
            epoch_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            # Update the progress bar with current metrics
            progress_bar.set_postfix({
                "Loss": f"{epoch_loss / (batch_idx + 1):.4f}",  # Average loss so far
                "Acc": f"{100. * correct / total:.2f}%"        # Current accuracy
            })

        # Print epoch summary
        print(f"Epoch {epoch+1}: Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {100. * correct / total:.2f}%")

In [None]:
def test(model, test_loader, loss_func, device):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0

    # Disable gradient computation for testing
    with torch.no_grad():
        # Initialize tqdm progress bar for the test loop
        progress_bar = tqdm(test_loader, desc="Testing", leave=False)

        for batch_idx, (inputs, targets) in enumerate(progress_bar):
            inputs, targets = inputs.to(device), targets.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = loss_func(outputs, targets)

            # Update test loss and accuracy
            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            # Update the progress bar with current metrics
            progress_bar.set_postfix({
                "Loss": f"{test_loss / (batch_idx + 1):.4f}",  # Average loss so far
                "Acc": f"{100. * correct / total:.2f}%"        # Current accuracy
            })

    # Print final test metrics
    avg_test_loss = test_loss / len(test_loader)
    test_accuracy = 100. * correct / total
    print(f"Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

    return avg_test_loss, test_accuracy

In [None]:
# Define model, loss function, optimizer, etc.
#check if gpu available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(device)
else:
    device = torch.device("cpu")
    print(device)

model = CNN(num_classes=29).to(device)
loss_func = nn.CrossEntropyLoss()
epochs = 10
lr = 1e-4
optimizer = optim.Adam(model.parameters(), lr=lr)

# Call the train function
train(model, train_loader, loss_func, optimizer, device, epochs=10)
test_loss, test_accuracy = test(model, val_loader, loss_func, device)