In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
from torch.utils.tensorboard import SummaryWriter
import torchvision.transforms.functional as TF
from torchvision.transforms import AutoAugmentPolicy, AutoAugment
from torchvision.transforms.v2 import MixUp
from torch.utils.tensorboard import SummaryWriter
import random
import numpy as np
from datetime import datetime
import os
from tqdm import tqdm

2024-11-18 02:20:49.479165: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print(torch.__version__)

2.4.0+cu121


In [3]:
# Set random seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

In [4]:
def evaluate_nn(model, test_loader, criterion, device):
    """
    Function to evaluate the neural network on the test data.
    """
    model.eval()
    running_loss = 0.0
    correct_labels = 0
    total_labels = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            predicted_labels = torch.argmax(outputs, dim=1)
            total_labels += labels.size(0)
            correct_labels += torch.sum(predicted_labels == labels).item()

    test_loss = running_loss / len(test_loader)
    test_accuracy = correct_labels / total_labels
    return test_loss, test_accuracy

In [5]:
def train_nn(model, train_loader, criterion, optimizer, device):
    """Function to train the neural network for one epoch"""
    model.train()
    running_loss = 0.0
    correct_labels = 0
    total_labels = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        predicted_labels = torch.argmax(outputs, dim=1)
        total_labels += labels.size(0)
        correct_labels += torch.sum(predicted_labels == labels).item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = correct_labels / total_labels
    
    return train_loss, train_accuracy

In [25]:
def run_training_resnet50(lr, num_epochs, train_loader, test_loader):
    """
    Fine-tune ResNet-50 for binary classification, logging training details to TensorBoard.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the pre-trained ResNet-50 model
    model = models.resnet50(pretrained=True)
    
    # Modify the final fully connected layer for binary classification
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # 2 output classes
    
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()             # Loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)  # Optimizer
    
    # TensorBoard writer
    writer = SummaryWriter()

    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    # Training loop
    for epoch in tqdm(range(num_epochs)):
        train_loss, train_accuracy = train_nn(model, train_loader, criterion, optimizer, device)
        test_loss, test_accuracy = evaluate_nn(model, test_loader, criterion, device)

        train_losses.append(train_loss)
        test_losses.append(test_loss)
        
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        # Write to TensorBoard
        writer.add_scalar("Loss/train", train_loss, epoch)
        writer.add_scalar("Loss/test", test_loss, epoch)
        writer.add_scalar("Accuracy/train", train_accuracy, epoch)
        writer.add_scalar("Accuracy/test", test_accuracy, epoch)
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.4f}")

    # Close the TensorBoard writer
    writer.close()

    # Returns the model and metrics
    return model, train_losses, train_accuracies, test_losses, test_accuracies

In [None]:
def run_training_resnet50(lr, num_epochs, train_loader, test_loader):
    """
    Fine-tune ResNet-50 for binary classification with Cosine Annealing LR scheduling, logging details to TensorBoard.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the pre-trained ResNet-50 model
    model = models.resnet50(pretrained=True)
    
    # Modify the final fully connected layer for binary classification
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # 2 output classes
    
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()                   # Loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)   # Optimizer
    
    # Initialize the cosine annealing scheduler
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

    # TensorBoard writer
    writer = SummaryWriter()

    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    # Training loop
    for epoch in tqdm(range(num_epochs)):
        train_loss, train_accuracy = train_nn(model, train_loader, criterion, optimizer, device)
        test_loss, test_accuracy = evaluate_nn(model, test_loader, criterion, device)

        # Step the scheduler
        scheduler.step()

        # Log training metrics
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        # Log metrics to TensorBoard
        writer.add_scalar("Loss/train", train_loss, epoch)
        writer.add_scalar("Loss/test", test_loss, epoch)
        writer.add_scalar("Accuracy/train", train_accuracy, epoch)
        writer.add_scalar("Accuracy/test", test_accuracy, epoch)

        # Log the current learning rate to TensorBoard
        current_lr = scheduler.get_last_lr()[0]
        writer.add_scalar("Learning Rate", current_lr, epoch)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.4f}")
        print(f"Learning Rate: {current_lr:.6f}")

    # Close the TensorBoard writer
    writer.close()

    # Returns the model and metrics
    return model, train_losses, train_accuracies, test_losses, test_accuracies

In [17]:
def run_training_resnet50(lr, num_epochs, train_loader, test_loader):
    """
    Fine-tune ResNet-50 for binary classification with Cosine Annealing LR scheduling, MixUp, and logging details to TensorBoard.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load the pre-trained ResNet-50 model
    model = models.resnet50(pretrained=True)
    
    # Modify the final fully connected layer for binary classification
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 2)  # 2 output classes
    
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()                   # Loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)   # Optimizer
    
    # Initialize Cosine Annealing Scheduler
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    
    # Initialize MixUp transform using torchvision or an equivalent library
    mixup_transform = MixUp(num_classes=2, alpha=1.0)
    
    # TensorBoard writer
    writer = SummaryWriter()

    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    # Training loop
    for epoch in tqdm(range(num_epochs)):
        model.train()
        running_loss = 0.0
        correct_labels = 0
        total_labels = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Apply MixUp transformation if available in transforms
            inputs, labels = mixup_transform(inputs, labels)
            # print(labels)
            
            optimizer.zero_grad()
            outputs = model(inputs)

            # Calculate mixup loss
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            predicted_labels = torch.argmax(outputs, dim=1)
            # total_labels += labels.size(0)
            # correct_labels += (lam * (predicted_labels == labels_a).sum().item() +
            #                    (1 - lam) * (predicted_labels == labels_b).sum().item())

        # Calculate average train loss and accuracy
        train_loss = running_loss / len(train_loader)
        # train_accuracy = correct_labels / total_labels

        # Evaluate on test data
        test_loss, test_accuracy = evaluate_nn(model, test_loader, criterion, device)

        # Step the scheduler
        scheduler.step()

        # Log metrics
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        
        # train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        # Log metrics to TensorBoard
        writer.add_scalar("Loss/train", train_loss, epoch)
        writer.add_scalar("Loss/test", test_loss, epoch)
        # writer.add_scalar("Accuracy/train", train_accuracy, epoch)
        writer.add_scalar("Accuracy/test", test_accuracy, epoch)

        # Log the current learning rate to TensorBoard
        current_lr = scheduler.get_last_lr()[0]
        writer.add_scalar("Learning Rate", current_lr, epoch)

        print(f"Epoch {epoch+1}/{num_epochs}")
        # print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}")
        print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.4f}")
        print(f"Learning Rate: {current_lr:.6f}")

    # Close the TensorBoard writer
    writer.close()

    # Returns the model and metrics
    return model, train_losses, train_accuracies, test_losses, test_accuracies

In [7]:
data_dir = '/home/ahmad/courses/cuda_lab/MA-INF-4308-Lab-Vision-Systems/Assignment-3/dataset'
log_dir = './runs/human_robot_classifier_' + datetime.now().strftime('%Y%m%d_%H%M%S')

In [8]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
# Set random seed
set_seed()

In [9]:
# Define transformations
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomApply([
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)
    ], p=0.8),
    # AutoAugment(AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [10]:
# Load datasets
train_dataset = datasets.ImageFolder(os.path.join(data_dir, 'train'), train_transforms)
val_dataset = datasets.ImageFolder(os.path.join(data_dir, 'val'), val_transforms)

In [11]:
train_dataset.classes, val_dataset.classes

(['person', 'robot'], ['person', 'robot'])

In [12]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

In [13]:
# Initialize Mixup transform
mixup_transform = MixUp(num_classes=2, alpha=1.0)

### Finetuning

In [14]:
# Load pre-trained ResNet50
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)  # 2 classes: human and robot

In [19]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

In [20]:
# Initialize tensorboard writer
writer = SummaryWriter(log_dir)

In [15]:
lr=0.001
num_epochs=10

In [18]:
# https://www.kaggle.com/code/ar2017/pytorch-efficientnet-train-aug-cutmix-fmix --> see this for cutmix
run_training_resnet50(lr, num_epochs, train_loader, val_loader)

  0%|                                                                                                                                                            | 0/10 [01:28<?, ?it/s]


NameError: name 'train_accuracy' is not defined

In [22]:
# Train the model
num_epochs = 20
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, mixup_transform, num_epochs, writer)

writer.close()

RuntimeError: The size of tensor a (32) must match the size of tensor b (2) at non-singleton dimension 1