In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # For multi-GPU setups
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False  # Can slow down but ensures determinism

set_seed(42)

import json
import matplotlib.pyplot as plt

from mobilenet import MobileNet
from data import get_train_valid_loader, get_test_loader

In [3]:
def train_func(model, optimizer, criterion, train_loader, device, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    return train_loss, train_accuracy

def eval(model, criterion, valid_loader, device):
    # Evaluate the model on the validation set
    model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    valid_loss = valid_loss / len(valid_loader)
    valid_accuracy = 100 * correct / total

    return valid_loss, valid_accuracy


In [4]:
batch_size = 128
data_dir = './data'
train_loader, valid_loader = get_train_valid_loader(data_dir=data_dir,batch_size=batch_size,augment=True,random_seed=42)
test_loader = get_test_loader(data_dir=data_dir, batch_size=batch_size)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
momentum = 0.9


In [7]:
# Hyperparameters for the new experiments
num_epochs = 300
learning_rate = 0.05 # best learning rate from the previous experiment 

# Function to train with cosine annealing learning rate
def train_cosine_annealing(model, initial_lr, criterion, train_loader, valid_loader, device, num_epochs):
    optimizer = optim.SGD(model.parameters(), lr=initial_lr, momentum=momentum)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    train_losses = []
    valid_losses = []
    train_accuracies = []
    valid_accuracies = []
    learning_rates = []
    for epoch in range(num_epochs):
        train_loss, train_accuracy = train_func(model, optimizer, criterion, train_loader, device, epoch)
        valid_loss, valid_accuracy = eval(model, criterion, valid_loader, device)

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        train_accuracies.append(train_accuracy)
        valid_accuracies.append(valid_accuracy)
        
        scheduler.step()
        
        # Get the current learning rate
        current_lr = scheduler.get_last_lr()[0]  # Extract current learning rate
        learning_rates.append(current_lr)
        if epoch % 10 == 0 or epoch == num_epochs-1:
            
            print(f"Epoch [{epoch+1}/{num_epochs}], "
                  f"Train Loss: {train_loss:.4f}, "
                  f"Train Acc: {train_accuracy:.2f}%, "
                  f"Valid Loss: {valid_loss:.4f}, "
                  f"Valid Acc: {valid_accuracy:.2f}%, "
                  f"Learning Rate: {current_lr:.6f}")

    return train_losses, valid_losses, train_accuracies, valid_accuracies, learning_rates
    
# Experiment 2: Train for 300 epochs with cosine annealing learning rate
print("Experiment 2: Training with cosine annealing learning rate")

model = MobileNet(num_classes=100, sigmoid_block_ind = []).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

train_losses, valid_losses, train_accuracies, valid_accuracies = train_cosine_annealing(
    model, learning_rate, criterion, train_loader, valid_loader, device, num_epochs)

plt.figure(figsize=(6, 4))
plt.plot(range(len(train_losses)), train_losses)
plt.plot(range(len(valid_losses)), valid_losses)
plt.xlabel("Number of epochs")
plt.ylabel("Loss")
plt.title("MobileNet with Consine Annealing LR")
plt.legend(['train', 'valid'])
plt.savefig('images/cosine_annealing_loss.png')
plt.show()

plt.figure(figsize=(6, 4))
plt.plot(range(len(train_accuracies)), train_accuracies)
plt.plot(range(len(valid_accuracies)), valid_accuracies)
plt.xlabel("Number of epochs")
plt.ylabel("Accuracy")
plt.title("MobileNet with Consine Annealing LR")
plt.legend(['train', 'valid'])
plt.savefig('images/cosine_annealing_accuracy.png')
plt.show()


learning_curves = {"train_losses": train_losses,
                    "valid_losses": valid_losses,
                    "train_accuracies": train_accuracies,
                    "valid_accuracies": valid_accuracies,
                   'learning_rates': learning_rates
                    }

with open(f'learning_curves/cosine_annealing.json', 'w') as fp:
    json.dump(learning_curves, fp)

Experiment 2: Training with cosine annealing learning rate
Epoch [1/300], Train Loss: 4.1285, Train Acc: 6.48%, Valid Loss: 3.7877, Valid Acc: 10.27%, Learning Rate: 0.049999
Epoch [11/300], Train Loss: 2.1332, Train Acc: 41.97%, Valid Loss: 2.3250, Valid Acc: 39.33%, Learning Rate: 0.049834
Epoch [21/300], Train Loss: 1.3774, Train Acc: 59.57%, Valid Loss: 1.9743, Valid Acc: 49.14%, Learning Rate: 0.049398
Epoch [31/300], Train Loss: 0.8732, Train Acc: 72.80%, Valid Loss: 2.0867, Valid Acc: 50.64%, Learning Rate: 0.048694
Epoch [41/300], Train Loss: 0.4658, Train Acc: 84.71%, Valid Loss: 2.3229, Valid Acc: 52.64%, Learning Rate: 0.047731
Epoch [51/300], Train Loss: 0.2514, Train Acc: 91.73%, Valid Loss: 2.5893, Valid Acc: 53.02%, Learning Rate: 0.046519
Epoch [61/300], Train Loss: 0.1417, Train Acc: 95.42%, Valid Loss: 2.8016, Valid Acc: 54.30%, Learning Rate: 0.045070
Epoch [71/300], Train Loss: 0.0842, Train Acc: 97.37%, Valid Loss: 2.9750, Valid Acc: 53.96%, Learning Rate: 0.043402

ValueError: too many values to unpack (expected 4)

In [6]:
!git add .
!git commit -m "Auto-commit after Consine Annealing experiment"
!git push origin main  # Change "main" to your branch name

Author identity unknown

*** Please tell me who you are.

Run

  git config --global user.email "you@example.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.

fatal: unable to auto-detect email address (got 'root@8ac860666ac8.(none)')
Everything up-to-date
