In [1]:
import time
import os
import sys
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import pandas as pd
from torch import nn
from torch import optim
from torch.nn import functional as F
import torch.utils.data as data


import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

In [2]:
train_dir = "data/sign_mnist_train.csv"
test_dir = "data/sign_mnist_test.csv"

In [3]:
import torch
from torch.utils import data

class SignLanguageDataset(data.Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        label = self.df.iloc[index, 0]
        img = self.df.iloc[index, 1:].values.astype('uint8').reshape(28, 28)
        
        if self.transform is not None:
            img = self.transform(img)
        else:
            img = torch.from_numpy(img).unsqueeze(0).float()
        
        return img, label


In [None]:
# Normalizing the data and transforming it to tensor
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Lambda(lambda img: img.convert('RGB')),  # Convert grayscale to RGB
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the data
train_df = pd.read_csv(train_dir)
test_df = pd.read_csv(test_dir)

# Split the data into train and eval
ratio = 0.8
train_df, eval_df = train_df[:int(ratio*len(train_df))], train_df[int(ratio*len(train_df)):]

# Print all labels
print(sorted(train_df['label'].unique()), len(train_df['label'].unique()))
print(sorted(eval_df['label'].unique()), len(eval_df['label'].unique()))

train_dataset = SignLanguageDataset(train_df, transform=transform)
eval_dataset = SignLanguageDataset(eval_df, transform=transform)
test_dataset = SignLanguageDataset(test_df, transform=transform)

In [None]:
def show_img(img, label):
    plt.imshow(img.squeeze().permute(1, 2, 0))
    plt.title(label)
    plt.show()

show_img(*train_dataset[0])

In [None]:
# Use a pretrained ResNet model that is frozen
model = models.resnet18(pretrained=True)

frozen = False
if frozen:
    for param in model.parameters():
        param.requires_grad = False

# Add a new layer to the model
model.fc = nn.Linear(512, 26)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define the data loaders
train_loader = data.DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_loader = data.DataLoader(eval_dataset, batch_size=32, shuffle=False)
test_loader = data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [35]:
def train(model, train_loader, optimizer, criterion):
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    for img, label in tqdm(train_loader, desc="Training"):
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(output, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

    avg_loss = train_loss / len(train_loader)
    accuracy = correct / total
    return avg_loss, accuracy

def evaluate(model, eval_loader, criterion):
    model.eval()
    eval_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for img, label in tqdm(eval_loader, desc="Evaluating"):
            output = model(img)
            loss = criterion(output, label)
            eval_loss += loss.item()
            _, predicted = torch.max(output, 1)
            total += label.size(0)
            correct += (predicted == label).sum().item()

    avg_loss = eval_loss / len(eval_loader)
    accuracy = correct / total
    return avg_loss, accuracy

In [None]:
# Training loop
n_epochs = 10
train_losses = []
eval_losses = []
train_acc = []
eval_acc = []

model.to('cuda') if torch.cuda.is_available() else model.to('cpu')

for epoch in range(n_epochs):
    train_loss, train_accuracy = train(model, train_loader, optimizer, criterion)
    eval_loss, eval_accuracy = evaluate(model, eval_loader, criterion)

    train_losses.append(train_loss)
    train_acc.append(train_accuracy)
    eval_losses.append(eval_loss)
    eval_acc.append(eval_accuracy)

    print(f"Epoch {epoch+1}/{n_epochs}")
    print(f"Train Loss: {train_loss:.3f} | Train Accuracy: {train_accuracy:.3f}")
    print(f"Eval Loss: {eval_loss:.3f} | Eval Accuracy: {eval_accuracy:.3f}")

In [None]:
# Create a 1x2 subplot to display the loss and accuracy side by side
plt.figure(figsize=(14, 5))

# Plot the training and evaluation loss on the left
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(eval_losses, label='Eval Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs Evaluation Loss')
plt.legend()

# Plot the training and evaluation accuracy on the right
plt.subplot(1, 2, 2)
plt.plot(train_acc, label='Train Accuracy')
plt.plot(eval_acc, label='Eval Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training vs Evaluation Accuracy')
plt.legend()

# Display the plots
plt.tight_layout()
plt.show()