In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# Set seed for PyTorch (CPU & CUDA)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)  # For multi-GPU

import json
import matplotlib.pyplot as plt

from mobilenet import MobileNet
from data import get_train_valid_loader, get_test_loader

In [2]:
def train_func(model, optimizer, criterion, train_loader, device, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    return train_loss, train_accuracy

def eval(model, criterion, test_loader, device):
    # Evaluate the model on the validation set
    model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    valid_loss = valid_loss / len(valid_loader)
    valid_accuracy = 100 * correct / total

    return valid_loss, valid_accuracy


In [3]:
batch_size = 128
data_dir = './data'
train_loader, valid_loader = get_train_valid_loader(data_dir=data_dir,batch_size=batch_size,augment=True,random_seed=42)
test_loader = get_test_loader(data_dir=data_dir, batch_size=batch_size)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

momentum = 0.9


In [None]:
# Hyperparameters for the new experiment
num_epochs = 300
learning_rate = 0.05
weight_decays = [5e-4, 1e-4]

# Function to train with weight decay and cosine annealing learning rate
def train_weight_decay_cosine_annealing(model, initial_lr, weight_decay, criterion, train_loader, valid_loader, device, num_epochs):
    optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    train_losses = []
    valid_losses = []
    train_accuracies = []
    valid_accuracies = []

    for epoch in range(num_epochs):
        train_loss, train_accuracy = train_func(model, optimizer, criterion, train_loader, device, epoch)
        valid_loss, valid_accuracy = eval(model, criterion, valid_loader, device)

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        train_accuracies.append(train_accuracy)
        valid_accuracies.append(valid_accuracy)

        scheduler.step()
        
        # Get the current learning rate
        current_lr = scheduler.get_last_lr()[0]  # Extract current learning rate
        if epoch % 10 == 0 or epoch == num_epochs-1:
            print(f"Epoch [{epoch+1}/{num_epochs}], "
                  f"Train Loss: {train_loss:.4f}, "
                  f"Train Acc: {train_accuracy:.2f}%, "
                  f"Valid Loss: {valid_loss:.4f}, "
                  f"Valid Acc: {valid_accuracy:.2f}%, "
                  f"Learning Rate: {current_lr:.6f}")

    return train_losses, valid_losses, train_accuracies, valid_accuracies
    
# Experiment: Train for 300 epochs with weight decay and cosine annealing learning rate
# MobileNet model

for weight_decay in weight_decays:
    print(f"Experiment: Training with weight decay {weight_decay} and cosine annealing learning rate")
    
    model = MobileNet(num_classes=100, sigmoid_block_ind = [4,5,6,7,8,9,10]).to(device) # USING SIGMOID 
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    
    train_losses, valid_losses, train_accuracies, valid_accuracies = train_weight_decay_cosine_annealing(
        model, learning_rate, weight_decay, criterion, train_loader, valid_loader, device, num_epochs)
    
    plt.figure(figsize=(6, 4))
    plt.plot(range(len(train_losses)), train_losses)
    plt.plot(range(len(valid_losses)), valid_losses)
    plt.xlabel("Number of epochs")
    plt.ylabel("Loss")
    plt.title("MobileNet with Weight Decay and Cosine Annealing LR: Loss vs Number of epochs")
    plt.legend(['train', 'valid'])
    plt.savefig(f'images/sigmoid_weight_decay_cosine_annealing_loss_{weight_decay}.png')
    plt.show()
    
    plt.figure(figsize=(6, 4))
    plt.plot(range(len(train_accuracies)), train_accuracies)
    plt.plot(range(len(valid_accuracies)), valid_accuracies)
    plt.xlabel("Number of epochs")
    plt.ylabel("Accuracy")
    plt.title("MobileNet with Weight Decay and Cosine Annealing LR: Accuracy vs Number of epochs")
    plt.legend(['train', 'valid'])
    plt.savefig(f'images/sigmoid_weight_decay_cosine_annealing_accuracy_{weight_decay}.png')
    plt.show()
    
    
    learning_curves = {"train_losses": train_losses,
                                    "valid_losses": valid_losses,
                                    "train_accuracies": train_accuracies,
                                    "valid_accuracies": valid_accuracies
                                        }
    with open(f'learning_curves/sigmoid_cosine_annealing_weight_decay_{weight_decay}.json', 'w') as fp:
        json.dump(learning_curves, fp)

Experiment: Training with weight decay 0.0005 and cosine annealing learning rate
Epoch [1/300], Train Loss: 4.2304, Train Acc: 5.42%, Valid Loss: 3.8526, Valid Acc: 9.19%, Learning Rate: 0.049999
Epoch [11/300], Train Loss: 2.0492, Train Acc: 43.84%, Valid Loss: 2.3076, Valid Acc: 39.09%, Learning Rate: 0.049834
Epoch [21/300], Train Loss: 1.7521, Train Acc: 50.82%, Valid Loss: 3.3615, Valid Acc: 26.01%, Learning Rate: 0.049398
Epoch [31/300], Train Loss: 1.6076, Train Acc: 54.28%, Valid Loss: 2.7026, Valid Acc: 32.12%, Learning Rate: 0.048694
Epoch [41/300], Train Loss: 1.4984, Train Acc: 56.88%, Valid Loss: 2.2101, Valid Acc: 42.92%, Learning Rate: 0.047731
Epoch [51/300], Train Loss: 1.4218, Train Acc: 58.83%, Valid Loss: 2.4895, Valid Acc: 38.29%, Learning Rate: 0.046519
Epoch [61/300], Train Loss: 1.3477, Train Acc: 60.55%, Valid Loss: 3.2364, Valid Acc: 27.10%, Learning Rate: 0.045070
Epoch [71/300], Train Loss: 1.2793, Train Acc: 62.50%, Valid Loss: 3.0323, Valid Acc: 31.95%, Le

In [None]:
!git add .
!git commit -m "Auto-commit after activation function experiment"
!git push origin main  # Change "main" to your branch name