In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os
import glob
import random
import time
import numpy as np
from numpy.linalg import norm
import pandas as pd 
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

# Input data files are available in the read-only "../input/" directory

dataset_path = "/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train"


In [None]:
def apply_clahe(image):
  
    """ Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) """
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(10,10))  # Create CLAHE object
    equalized = clahe.apply(gray)  # Apply CLAHE
    
    return equalized 

In [None]:
class ASLDataset(Dataset):
  """ASL dataset."""

  def __init__(self, root_dir, transform=None):

    """
    Args:
      root_dir: Image directory
      transform: Optional transform to be applied.
    """

    self.root_dir = root_dir
    self.transform = transform

    # Locate files in the dataset and assign labels (label == folder name)
    self.image_paths = glob.glob(os.path.join(root_dir, '**/*.jpg'), recursive=True)
    self.labels = [os.path.basename(os.path.dirname(path)) for path in self.image_paths] 
    
    # Convert between labels and integers
    self.label_to_index = {label: idx for idx, label in enumerate(sorted(set(self.labels)))}
    self.index_to_label = {idx: label for label, idx in self.label_to_index.items()}
    self.labels = [self.label_to_index[label] for label in self.labels]

  def __len__(self):
    return len(self.image_paths)

  def __getitem__(self, idx):

    """ 
    Args:
      idx: Index of the item to get
    """
      
    path = self.image_paths[idx]
    label = self.labels[idx]

    image = cv2.imread(path)  # Load with OpenCV
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    image = apply_clahe(image)

    image = np.array(image, dtype=np.uint8)
    # Resize image
    image = cv2.resize(image, (64, 64))

    # Normalize to [-1, 1]
    image = image.astype(np.float32) / 255.0
    image = (image - 0.5) / 0.5 

    # Convert to torch tensor and permute to (C, H, W)--use this if processing color images
    #image = torch.tensor(image).permute(2, 0, 1)

    # Use this if processing grayscale
    image = torch.tensor(image).unsqueeze(0)

    if self.transform:
        image = self.transform(image)

    return image, label

In [None]:
# Load dataset
dataset = ASLDataset(root_dir=dataset_path, transform=None)

# 80/20 train-test split
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# Show image
def imshow(img):
    img = img / 2 + 0.5     # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Get a batch to display for testing
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Show images
imshow(torchvision.utils.make_grid(images))


In [None]:
num_classes = 29   # A - Z, space, nothing, del

class SimpleCNN(nn.Module):
    def __init__(self, num_classes):

        super(SimpleCNN, self).__init__()

        # Don't forget to change the number of input channels if using color images
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)        # Convolutional layer (in_channels, out_channels, kernel_size, padding)
        self.pool = nn.MaxPool2d(2, 2)                     # Pooling/downsampling layer
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)       # Second convolutional layer
        self.fc1 = nn.Linear(32 * 16 * 16, 128)            # Fully-connected layer
        self.fc2 = nn.Linear(128, num_classes)       

    def forward(self, x):
      
        x = self.pool(torch.relu(self.conv1(x)))   
        x = self.pool(torch.relu(self.conv2(x)))  
        x = x.view(x.shape[0], -1)    # 
        x = torch.relu(self.fc1(x))    
        x = self.fc2(x)      
        return x

model = SimpleCNN(num_classes=num_classes)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Basic training loop
num_epochs = 5
for epoch in range(num_epochs):

  for i, (images, labels) in enumerate(train_loader):

    #print(f"Processing batch {i}/{len(train_loader)}")

    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    
  print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

In [None]:
# Single example prediction
with torch.no_grad():
    
    sample_image, true_label = dataset[200]    # Get sample image and its true label
    sample_image = sample_image.unsqueeze(0)   # Add batch dimension
    
    # Predict label
    prediction = model(sample_image)
    predicted_label = torch.argmax(prediction).item()

# Get the true label from the dataset's index_to_label mapping
true_label_str = dataset.index_to_label[true_label]

# Print the results
print(f"True character: {true_label_str}")
print(f"Predicted character: {dataset.index_to_label[predicted_label]}")

In [None]:
# Set model to evaluation mode
model.eval()

all_true_labels = []
all_predicted_labels = []

total_loss = 0.0
correct_preds = 0
total_samples = 0

# Disable gradients
with torch.no_grad():
    
    for images, labels in test_loader:
        
        # Forward pass
        outputs = model(images)
        
        # Compute loss
        loss = criterion(outputs, labels)
        total_loss += loss.item()  # Accumulate the loss
        
        # Get predictions
        _, predicted = torch.max(outputs, 1)
        
        # Store true and predicted labels
        all_true_labels.extend(labels.cpu().numpy())  # move to CPU and convert to numpy
        all_predicted_labels.extend(predicted.cpu().numpy())
        
        # Track num correct predictions
        correct_preds += (predicted == labels).sum().item()
        total_samples += labels.size(0)

# Compute avg loss and accuracy
avg_loss = total_loss / len(test_loader)
accuracy = 100 * correct_preds / total_samples

print(f"Test Loss: {avg_loss:.4f}")
print(f"Test Accuracy: {accuracy:.2f}%")


In [None]:
# Generate and plot confusion matrix
cm = confusion_matrix(all_true_labels, all_predicted_labels)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=dataset.index_to_label.values(), yticklabels=dataset.index_to_label.values())
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()