In [1]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")


CUDA available: False
GPU Name: N/A


In [17]:
!pip install torch torchvision matplotlib tqdm scikit-learn --quiet


In [18]:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets, models
from torchvision.models.vision_transformer import vit_b_16
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Path to your nested train dataset folder
dataset_path = r"C:\Users\VR\Desktop\Amna-Asad\asl_alphabet_dataset\asl_alphabet_train\asl_alphabet_train"

# Create directories to save models and graphs
models_dir = "models"
graphs_dir = "graphs"
os.makedirs(models_dir, exist_ok=True)
os.makedirs(graphs_dir, exist_ok=True)

# Device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [19]:
batch_size = 32
image_size = 224

train_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

# Load dataset and split into train and val
full_dataset = datasets.ImageFolder(dataset_path, transform=train_transform)
num_classes = len(full_dataset.classes)

val_size = int(0.2 * len(full_dataset))
train_size = len(full_dataset) - val_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Use validation transforms on val dataset
val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

print(f"Classes: {full_dataset.classes}")
print(f"Train samples: {train_size}, Validation samples: {val_size}")


Classes: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
Train samples: 69600, Validation samples: 17400


In [None]:
def plot_metrics(train_acc, val_acc, train_loss, val_loss, model_name):
    plt.figure()
    plt.plot(train_acc, label="Train Accuracy")
    plt.plot(val_acc, label="Validation Accuracy")
    plt.title(f"{model_name} Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy (%)")
    plt.legend()
    plt.savefig(os.path.join(graphs_dir, f"{model_name}_accuracy.png"))
    plt.close()

    plt.figure()
    plt.plot(train_loss, label="Train Loss")
    plt.plot(val_loss, label="Validation Loss")
    plt.title(f"{model_name} Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(os.path.join(graphs_dir, f"{model_name}_loss.png"))
    plt.close()
    

In [21]:
def train_model(model, model_name, epochs_freeze=5, epochs_finetune=25):
    model.to(device)

    # Freeze all parameters first
    for param in model.parameters():
        param.requires_grad = False

    # Unfreeze classifier layers only
    if model_name == "ViT":
        for param in model.heads.head.parameters():
            param.requires_grad = True
    elif model_name == "GoogLeNet":
        for param in model.fc.parameters():
            param.requires_grad = True
    else:  # AlexNet and VGG16
        for param in model.classifier.parameters():
            param.requires_grad = True

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, verbose=True)
    
    patience = 5
    best_val_loss = float('inf')
    no_improve_epochs = 0

    train_acc, val_acc = [], []
    train_loss, val_loss = [], []

    total_epochs = epochs_freeze + epochs_finetune

    for epoch in range(total_epochs):
        if epoch == epochs_freeze:
            # Unfreeze all layers for fine-tuning
            for param in model.parameters():
                param.requires_grad = True
            optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # lower LR for finetuning
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, verbose=True)
            print(f"Unfroze all layers and lowered LR to 0.0001 at epoch {epoch+1}")

        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in tqdm(train_loader, desc=f"{model_name} Training Epoch {epoch+1}/{total_epochs}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = outputs.max(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss_epoch = running_loss / len(train_loader)
        train_acc_epoch = 100 * correct / total
        train_loss.append(train_loss_epoch)
        train_acc.append(train_acc_epoch)

        # Validation step
        model.eval()
        val_running_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item()
                _, preds = outputs.max(1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_loss_epoch = val_running_loss / len(val_loader)
        val_acc_epoch = 100 * val_correct / val_total
        val_loss.append(val_loss_epoch)
        val_acc.append(val_acc_epoch)

        print(f"Epoch {epoch+1}: Train Acc={train_acc_epoch:.2f}%, Val Acc={val_acc_epoch:.2f}%, Val Loss={val_loss_epoch:.4f}")

        scheduler.step(val_loss_epoch)

        # Early stopping
        if val_loss_epoch < best_val_loss:
            best_val_loss = val_loss_epoch
            no_improve_epochs = 0
            torch.save(model.state_dict(), os.path.join(models_dir, f"{model_name}_best.pt"))
        else:
            no_improve_epochs += 1
            if no_improve_epochs >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    torch.save(model.state_dict(), os.path.join(models_dir, f"{model_name}_last.pt"))
    plot_metrics(train_acc, val_acc, train_loss, val_loss, model_name)


In [28]:
def train_model(model, model_name, num_epochs=30):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
    # Removed verbose=True from here:
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3)

    patience = 5
    best_val_loss = float('inf')
    no_improve_epochs = 0

    train_acc, val_acc, train_loss, val_loss = [], [], [], []

    for epoch in range(num_epochs):
        model.train()
        total, correct, running_loss = 0, 0, 0

        for imgs, labels in tqdm(train_loader, desc=f"{model_name} - Epoch {epoch+1}"):
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, preds = outputs.max(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss_epoch = running_loss / len(train_loader)
        train_acc_epoch = 100 * correct / total
        train_loss.append(train_loss_epoch)
        train_acc.append(train_acc_epoch)

        # Validation
        model.eval()
        correct, total, val_loss_epoch = 0, 0, 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                val_loss_epoch += loss.item()
                _, preds = outputs.max(1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

        val_loss_epoch /= len(val_loader)
        val_acc_epoch = 100 * correct / total
        val_loss.append(val_loss_epoch)
        val_acc.append(val_acc_epoch)

        print(f"Epoch {epoch+1}: Train Acc={train_acc_epoch:.2f}%, Val Acc={val_acc_epoch:.2f}%, Val Loss={val_loss_epoch:.4f}")

        scheduler.step(val_loss_epoch)

        # Early stopping
        if val_loss_epoch < best_val_loss:
            best_val_loss = val_loss_epoch
            no_improve_epochs = 0
            torch.save(model.state_dict(), os.path.join(models_dir, f"{model_name}_best.pt"))
        else:
            no_improve_epochs += 1
            if no_improve_epochs >= patience:
                print(f"⛔ Early stopping at epoch {epoch+1}")
                break

    torch.save(model.state_dict(), os.path.join(models_dir, f"{model_name}_last.pt"))
    plot_metrics(train_acc, val_acc, train_loss, val_loss, model_name)


In [23]:
def evaluate_model(model, model_name, model_path):
    model.to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    all_preds = []
    all_labels = []
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = outputs.max(1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    print(f"{model_name} Validation Accuracy: {accuracy:.2f}%")

    cm = confusion_matrix(all_labels, all_preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=full_dataset.classes)
    plt.figure(figsize=(12, 12))
    disp.plot(cmap=plt.cm.Blues, xticks_rotation='vertical')
    plt.title(f"{model_name} Confusion Matrix")
    plt.savefig(os.path.join(graphs_dir, f"{model_name}_confusion_matrix.png"))
    plt.close()


In [25]:
models_to_train = {
    "AlexNet": models.alexnet(weights=models.AlexNet_Weights.IMAGENET1K_V1),
    "VGG16": models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1),
    "GoogLeNet": models.googlenet(weights=models.GoogLeNet_Weights.IMAGENET1K_V1),
    "ViT": vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_V1),
}

for name, model in models_to_train.items():
    if name == "ViT":
        model.heads.head = nn.Linear(model.heads.head.in_features, num_classes)
    elif name == "GoogLeNet":
        model.fc = nn.Linear(model.fc.in_features, num_classes)
    else:  # AlexNet and VGG16
        model.classifier[-1] = nn.Linear(model.classifier[-1].in_features, num_classes)


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to C:\Users\VR/.cache\torch\hub\checkpoints\alexnet-owt-7be5be79.pth


100.0%


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\VR/.cache\torch\hub\checkpoints\vgg16-397923af.pth


100.0%


Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to C:\Users\VR/.cache\torch\hub\checkpoints\googlenet-1378be20.pth


100.0%


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to C:\Users\VR/.cache\torch\hub\checkpoints\vit_b_16-c867db91.pth


100.0%


In [None]:
for model_name, model in models_to_train.items():
    print(f"\nStarting training for {model_name}...")
    train_model(model, model_name)
print("\nAll training complete!")


In [31]:
for model_name, model in models_to_train.items():
    best_model_path = os.path.join(models_dir, f"{model_name}_best.pt")
    print(f"\nEvaluating {model_name}...")
    evaluate_model(model, model_name, best_model_path)
print("Evaluation complete!")



Evaluating AlexNet...
AlexNet Validation Accuracy: 99.98%

Evaluating VGG16...
VGG16 Validation Accuracy: 3.39%

Evaluating GoogLeNet...
GoogLeNet Validation Accuracy: 100.00%

Evaluating ViT...
ViT Validation Accuracy: 99.91%
Evaluation complete!


<Figure size 1200x1200 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>

<Figure size 1200x1200 with 0 Axes>