# CIFAR-10 Double Descent Experiments

**GRA4157 Big Data - Final Project**

Jakob Sverre Alexandersen

This notebook runs comprehensive double descent experiments on CIFAR-10:
1. **Model-wise Double Descent**: Training models of varying complexity
2. **Epoch-wise Double Descent**: Training a single model for many epochs


In [None]:
# Global seed for reproducibility
SEED = 42

# Experiment configuration
CONFIG = {
    'seed': SEED,
    'model_wise': {
        'num_epochs': 100,
        'learning_rate': 0.001,
        'results_dir': 'results/model_wise'
    },
    'epoch_wise': {
        'model_name': 'Baseline', 
        # 'model_name': 'Medium',
        'num_epochs': 400,
        'learning_rate': 0.001,
        'results_dir': 'results/epoch_wise'
    }
}

print(f"Configuration loaded with seed: {SEED}")
print(f"Model-wise: {CONFIG['model_wise']['num_epochs']} epochs")
print(f"Epoch-wise: {CONFIG['epoch_wise']['num_epochs']} epochs")


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings

# Import experiment functions
from cifar10_experiments import (
    load_cifar10,
    get_class_names,
    run_model_wise_experiment,
    run_epoch_wise_experiment,
    plot_model_wise_double_descent,
    plot_epoch_wise_double_descent,
    plot_confusion_matrix,
    create_results_summary_table
)

# Set random seeds for all libraries
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
device = torch.device(
    "cuda" if torch.cuda.is_available() 
    else "mps" if torch.backends.mps.is_available() 
    else "cpu"
)

print(f"Using device: {device}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
elif torch.backends.mps.is_available():
    print("Using MPS")
else:
    print("Using CPU")


In [None]:
print("="*80)
print("loading cifar-10 dataset")
print("="*80)
print(f"start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

X_train, X_val, X_test, y_train, y_val, y_test = load_cifar10(seed=SEED)
class_names = get_class_names()

print(f"\nclass names: {class_names}")


## Experiment 1: Model-wise Double Descent

Train 4 models of varying complexity for 100 epochs each.


In [None]:
print("experiment 1: model-wise double descent")

model_wise_results = run_model_wise_experiment(
    X_train, X_val, X_test,
    y_train, y_val, y_test,
    num_epochs=CONFIG['model_wise']['num_epochs'],
    learning_rate=CONFIG['model_wise']['learning_rate'],
    results_dir=CONFIG['model_wise']['results_dir'],
    seed=SEED
)

#generate visualizations
plot_model_wise_double_descent(model_wise_results)
create_results_summary_table(model_wise_results)

#find and plot best model confusion matrix
best_model_name = max(model_wise_results.keys(), key=lambda k: model_wise_results[k]['test_f1'])
plot_confusion_matrix(
    y_test,
    model_wise_results[best_model_name]['y_pred'],
    class_names,
    save_path=f'report/figures/confusion_matrix_{best_model_name.lower().replace(" ", "_")}.pdf'
)

print(f"\nbest model: {best_model_name} with F1 = {model_wise_results[best_model_name]['test_f1']:.4f}")


## Experiment 2: Epoch-wise Double Descent

Train a single model for 400 epochs.


In [None]:
print("experiment 2: epoch-wise DD")

epoch_wise_results = run_epoch_wise_experiment(
    X_train, X_val, X_test,
    y_train, y_val, y_test,
    model_name=CONFIG['epoch_wise']['model_name'],
    num_epochs=CONFIG['epoch_wise']['num_epochs'],
    learning_rate=CONFIG['epoch_wise']['learning_rate'],
    results_dir=CONFIG['epoch_wise']['results_dir'],
    seed=SEED
)

# Generate visualizations
plot_epoch_wise_double_descent(epoch_wise_results)

print(f"\nfinal F1 score: {epoch_wise_results['test_f1']:.4f}")


In [None]:
print(f"all experiments completed at end time {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")