In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import time
from tqdm import tqdm

if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f"Using device: {device}")

# Hyperparameters

In [None]:
batch_size = 64
num_epochs = 50
hidden_sizes = [512, 256, 128]

In [None]:
# Data transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

# Data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# CIFAR-10 classes
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
class FullyConnectedNN(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_classes):
        super(FullyConnectedNN, self).__init__()
        
        # Flatten layer to convert 3D images to 1D vectors
        self.flatten = nn.Flatten()
        
        # Create a list to hold all layers
        layers = []
        
        # Input layer
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(nn.ReLU())
        
        # Hidden layers
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            layers.append(nn.ReLU())
        
        # Output layer
        layers.append(nn.Linear(hidden_sizes[-1], num_classes))
        
        # Sequential container
        self.linear_layers = nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.flatten(x)
        out = self.linear_layers(x)
        return out

In [None]:
# CIFAR-10 images are 32x32 with 3 color channels
input_size = 32 * 32 * 3
num_classes = 10

# Initialize the network
model = FullyConnectedNN(input_size, hidden_sizes, num_classes).to(device)

In [None]:
def evaluate_model(model, data_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    loss = running_loss / len(data_loader)
    accuracy = 100 * correct / total
    
    return loss, accuracy

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    # Lists to store metrics
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []
    
    # Track time
    start_time = time.time()
    
    for epoch in range(num_epochs):
        # Set model to training mode
        model.train()
        
        # Metrics for this epoch
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Create progress bar with specified format
        progress_bar = tqdm(
            train_loader,
            total=len(train_loader),
            desc=f'Epoch {epoch+1}/{num_epochs}',
        )
        
        for batch_idx, (images, labels) in enumerate(progress_bar):
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            current_loss = running_loss / (batch_idx + 1)
            current_acc = 100 * correct / total
            
            progress_bar.set_postfix_str(
                f'loss: {current_loss:.4f} - acc: {current_acc:.2f}%'
            )
        
        epoch_loss = running_loss / len(train_loader)
        epoch_acc = 100 * correct / total
        train_losses.append(epoch_loss)
        train_accuracies.append(epoch_acc)
        
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)
        
        print(f'Epoch {epoch+1}/{num_epochs} - '
              f'loss: {epoch_loss:.4f} - acc: {epoch_acc:.2f}% - '
              f'val_loss: {val_loss:.4f} - val_acc: {val_acc:.2f}%')

    elapsed_time = time.time() - start_time
    print(f'Training completed in {elapsed_time:.2f} seconds')
    
    return {
        'train_losses': train_losses,
        'train_accuracies': train_accuracies,
        'val_losses': val_losses,
        'val_accuracies': val_accuracies
    }

In [None]:
def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            # Per-class accuracy
            c = (predicted == labels).squeeze()
            for i in range(labels.size(0)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
    
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    
    # Print per-class accuracy
    for i in range(10):
        print(f'Accuracy of {classes[i]}: {100 * class_correct[i] / class_total[i]:.2f}%')
    
    return accuracy

In [None]:
# Plot learning curves
def plot_learning_curves(history, save_to='learning_curves.png', title='Learning Curves'):
    plt.figure(figsize=(12, 5))
    
    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(history['train_losses'], label='Training Loss')
    plt.plot(history['val_losses'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'{title} - Loss Curves')
    plt.legend()
    
    # Plot accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history['train_accuracies'], label='Training Accuracy')
    plt.plot(history['val_accuracies'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.title(f'{title} - Accuracy Curves')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(save_to)
    plt.show()

In [None]:
def experiment(
    model, 
    train_loader, 
    val_loader, 
    criterion, 
    optimizer, 
    num_epochs, 
    device, 
    model_path="model/cifar10_fcnn.pth",
    learining_curves_path="learning_curves.png",
    title="Learning Curves",
    weight_decay=0.0  # L2 regularization
):
    """
    Perform training and testing of the model with different loss functions, optimizers, and regularization methods.
    """
    if isinstance(optimizer, optim.SGD) or isinstance(optimizer, optim.Adam):
        optimizer.param_groups[0]['weight_decay'] = weight_decay

    # Train the model
    history = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)
    
    # Test the model
    test_accuracy = test_model(model, val_loader, device)
    print(f"Test Accuracy: {test_accuracy:.2f}%")
    
    # Plot learning curves
    plot_learning_curves(history, learining_curves_path, title)

    # Save the model
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")

In [None]:
loss_functions = [
    nn.CrossEntropyLoss(),
    nn.MSELoss(),
    nn.BCEWithLogitsLoss()
]

optimizers = {
    'Batch GD': optim.SGD(model.parameters(), lr=0.01, momentum=0),
    'Online GD (SGD)': optim.SGD(model.parameters(), lr=0.01),
    'Mini-Batch GD': optim.SGD(model.parameters(), lr=0.01, momentum=0.9),
    'Momentum': optim.SGD(model.parameters(), lr=0.01, momentum=0.9),
    'Adagrad': optim.Adagrad(model.parameters(), lr=0.01),
    'Adam': optim.Adam(model.parameters(), lr=0.001),
    'Adamax': optim.Adamax(model.parameters(), lr=0.001)
}

weight_decay_values = [0.0, 1e-4, 1e-3]

model_path = "model/cifar10_fcnn"
learining_curves_path = "result/learning_curves"

for loss_fn in loss_functions:
    for optimizer_name, optimizer in optimizers.items():
        for weight_decay in weight_decay_values:
            print(f"\nRunning experiment with {loss_fn.__class__.__name__}, {optimizer_name}, weight_decay={weight_decay}")

            learning_curve_title = f'{loss_fn.__class__.__name__} - {optimizer_name} - WD={weight_decay}'

            experiment(
                model, 
                train_loader, 
                val_loader, 
                criterion=loss_fn, 
                optimizer=optimizer, 
                num_epochs=num_epochs, 
                device=device, 
                model_path=f"{model_path}_{loss_fn.__class__.__name__}_{optimizer.__class__.__name__}_wd{weight_decay}.pth",
                learining_curves_path=f"{learining_curves_path}_{loss_fn.__class__.__name__}_{optimizer.__class__.__name__}_wd{weight_decay}.png",
                title=learning_curve_title,
                weight_decay=weight_decay
            )