In [None]:
!git clone https://github.com/Berkeley-CS182/cs182fa25_public.git

In [None]:
%cd cs182fa25_public/hw06/code

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import wandb
from architectures import BasicConvNet, ResNet18, MLP
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

# Exploring Tensorboard
Tensorboard is a local tool for visualizing images, metrics, histograms, and more. It is designed for tensorflow, but can be integrated with torch. Let's explore tensorboard usage with an example:

```from torch.utils.tensorboard import SummaryWriter

# To start a run, call the following
writer = SummaryWriter(comment=f'Name_of_Run')

# When you want to log a value, use the writer. When adding a scalar, the format is as follows: 
# add_scalar(tag, scalar_value, global_step=None, walltime=None, new_style=False, double_precision=False)
writer.add_scalar('Training Loss', loss.item(), step)

# Finally, when you are done logging values, close the writer
writer.close()


```
There are many other functionalities and methods that you are free to explore, but will not be mentioned in this notebook.

## Your Task
We will be once again building classifiers for the CIFAR-10. There are various architectures set up for you to use in the architectures.py file. Using tensorboard, please search through 5 different hyperparameter configurations. Examples of choices include: learning rate, batch size, architecture, optimization algorithm, etc. Please submit the generated plots on your pdf and answer question A. 

In [None]:
epochs = 2  # Reduced for demonstration, increase for better results
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
print(f"Using device: {device}")

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                    download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)


In [None]:
# Define 5 hyperparameter configurations to test
hyperparameters = [
    {
        'name': 'basic_conv_sgd',
        'model': 'BasicConvNet',
        'batch_size': 64,
        'learning_rate': 0.01,
        'optimizer': 'sgd',
        'weight_decay': 1e-4
    },
    {
        'name': 'basic_conv_adam',
        'model': 'BasicConvNet',
        'batch_size': 64,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'weight_decay': 1e-4
    },
    {
        'name': 'resnet_adam',
        'model': 'ResNet18',
        'batch_size': 32,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'weight_decay': 1e-4
    },
    {
        'name': 'mlp_adam',
        'model': 'MLP',
        'batch_size': 128,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'weight_decay': 1e-5
    },
    {
        'name': 'mlp_sgd_large_batch',
        'model': 'MLP',
        'batch_size': 256,
        'learning_rate': 0.01,
        'optimizer': 'sgd',
        'weight_decay': 1e-4
    }
]

In [None]:
def train_model(hp):
    # Create TensorBoard writer
    writer = SummaryWriter(comment=f"_{hp['name']}")
    
    # Set up data loaders
    batch_size = hp['batch_size']
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
    
    # Initialize model based on hyperparameter configuration
    if hp['model'] == 'BasicConvNet':
        model = BasicConvNet()
    elif hp['model'] == 'ResNet18':
        # For ResNet18, we need to resize images to 224x224
        transform_224 = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        trainset.transform = transform_224
        testset.transform = transform_224
        model = ResNet18()
    elif hp['model'] == 'MLP':
        model = MLP()
    else:
        raise ValueError(f"Unknown model: {hp['model']}")
    
    model = model.to(device)
    
    # Loss function
    criterion = nn.CrossEntropyLoss()
    
    # Optimizer
    if hp['optimizer'] == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=hp['learning_rate'], momentum=0.9, weight_decay=hp['weight_decay'])
    elif hp['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=hp['learning_rate'], weight_decay=hp['weight_decay'])
    else:
        raise ValueError(f"Unknown optimizer: {hp['optimizer']}")
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        # Training
        train_loop = tqdm(trainloader, desc=f"Epoch {epoch+1}/{epochs} (Train)")
        for i, data in enumerate(train_loop):
            inputs, labels = data[0].to(device), data[1].to(device)
            
            # MLP expects flattened input
            if hp['model'] == 'MLP':
                inputs = inputs.view(inputs.size(0), -1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            # Update progress bar
            train_loop.set_postfix({
                'loss': running_loss / (i + 1),
                'acc': 100. * correct / total
            })
            
            # Log to TensorBoard (every 100 batches)
            if i % 100 == 99:
                step = epoch * len(trainloader) + i
                writer.add_scalar('Training Loss', running_loss / 100, step)
                writer.add_scalar('Training Accuracy', 100. * correct / total, step)
                running_loss = 0.0
        
        # Testing
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            test_loop = tqdm(testloader, desc=f"Epoch {epoch+1}/{epochs} (Test)")
            for i, data in enumerate(test_loop):
                inputs, labels = data[0].to(device), data[1].to(device)
                
                # MLP expects flattened input
                if hp['model'] == 'MLP':
                    inputs = inputs.view(inputs.size(0), -1)
                    
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
                
                test_loop.set_postfix({
                    'loss': test_loss / (i + 1),
                    'acc': 100. * correct / total
                })
        
        # Log epoch statistics to TensorBoard
        writer.add_scalar('Test Loss', test_loss / len(testloader), epoch)
        writer.add_scalar('Test Accuracy', 100. * correct / total, epoch)
    
    writer.close()
    print(f"Training complete for {hp['name']}")

def run():
    for hp in hyperparameters:
        print(f"\nStarting training with configuration: {hp['name']}")
        train_model(hp)

In [None]:
# Run the experiments
run()


In [None]:
# view tensorboard results:
# Run this in terminal or a separate cell:
# %load_ext tensorboard
# %tensorboard --logdir=runs

%load_ext tensorboard
%tensorboard --logdir=runs
