<a href="https://colab.research.google.com/github/jogfx/ADL2024/blob/main/ADL1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import random
from torch.utils.data import DataLoader
import torch.optim.lr_scheduler as lr_scheduler

In [2]:
# Define transformations to be applied to the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
# Choose CIFAR-10 or CIFAR-100
dataset_name = "CIFAR-10"

In [4]:
# Download and load the training set
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 39739003.90it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [5]:
# Download and load the test set
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified


In [6]:
# Define data augmentation transforms
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [7]:
# Create the training set with data augmentation
trainset_augmented = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)

Files already downloaded and verified


In [8]:
# Print the classes for reference
classes = trainset.classes
print(f'Classes: {classes}')

Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [9]:
# Hyperparameter variations
hidden_size1_values = [64, 128, 256, 512]
hidden_size2_values = [64, 128, 256, 512]
hidden_size3_values = [64, 128, 256, 512]
learning_rate_values = [0.008, 0.02, 0.005]
batch_size_values = [32, 64, 128]
epochs = [10, 20, 30]

In [10]:
# Parameters
input_size = 3 * 32 * 32  # CIFAR-10 with 3 channels and 32x32 pixels
output_size =  10  # number of classes in cifar10

In [11]:
# Number of random experiments
num_experiments = 5
num_epochs = random.choice(epochs)

In [12]:
for experiment_num in range(num_experiments):
    # Randomly select hyperparameters for this experiment
    hidden_size1 = random.choice(hidden_size1_values)
    hidden_size2 = random.choice(hidden_size2_values)
    hidden_size3 = random.choice(hidden_size3_values)
    lr = random.choice(learning_rate_values)

    # Model structure
    model = nn.Sequential(
        nn.Linear(input_size, hidden_size1),
        nn.BatchNorm1d(hidden_size1),
        nn.LeakyReLU(0.01),
        nn.Linear(hidden_size1, hidden_size2),
        nn.BatchNorm1d(hidden_size2),
        nn.LeakyReLU(0.01),
        nn.Linear(hidden_size2, hidden_size3),
        nn.BatchNorm1d(hidden_size3),
        nn.LeakyReLU(0.01),
        nn.Linear(hidden_size3, output_size)
    )

    # Add a custom weight initialization function
    def weights_init(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            nn.init.zeros_(m.bias)

    # Apply the custom initialization to the model
    model.apply(weights_init)

    # Loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Data loaders
    train_loader = DataLoader(trainset_augmented, batch_size=random.choice(batch_size_values), shuffle=True)
    test_loader = DataLoader(testset, batch_size=random.choice(batch_size_values), shuffle=False)

    # Training loop
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        for inputs, labels in train_loader:
          inputs = inputs.view(inputs.size(0), -1)  # Flatten each input
          optimizer.zero_grad()  # Zero the gradients
          outputs = model(inputs)  # Forward pass
          loss = criterion(outputs, labels)  # Compute the loss
          loss.backward()  # Backward pass
          optimizer.step()  # Update weights

    # Validation loop
    model.eval()  # Set the model to evaluation mode
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.view(inputs.size(0), -1)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = total_correct / total_samples
    print(f"Experiment {experiment_num + 1}: Hidden Size 1={hidden_size1}, Hidden Size 2={hidden_size2}, Hidden Size 3={hidden_size3}, Learning Rate={lr}, Epoch={epoch+1}")
    print(f"Test Accuracy: {accuracy}")

Experiment 1: Hidden Size 1=512, Hidden Size 2=64, Hidden Size 3=512, Learning Rate=0.02, Epoch=20
Test Accuracy: 0.5664
Experiment 2: Hidden Size 1=256, Hidden Size 2=64, Hidden Size 3=512, Learning Rate=0.005, Epoch=20
Test Accuracy: 0.5755
Experiment 3: Hidden Size 1=64, Hidden Size 2=256, Hidden Size 3=256, Learning Rate=0.02, Epoch=20
Test Accuracy: 0.5496
Experiment 4: Hidden Size 1=256, Hidden Size 2=512, Hidden Size 3=256, Learning Rate=0.005, Epoch=20
Test Accuracy: 0.5813
Experiment 5: Hidden Size 1=256, Hidden Size 2=128, Hidden Size 3=512, Learning Rate=0.02, Epoch=20
Test Accuracy: 0.5625
