In [1]:
"""
This file contains the code to run the baseline experiments.

More specifically, we are investigating the performance of basic models on the CIFAR-10 and CIFAR-10H datasets. The tasks for these datasets are multi-class classification.

The basic models include:
    * ResNet-50
    * VGG-16
    * Logistic Regression
    * Random Forest
    * XGBoost
"""

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import os
from typing import Tuple

# Loading Data

In [2]:
# Load CIFAR-10 dataset and return train, validation, and test DataLoaders
def load_cifar10() -> Tuple[Dataset, Dataset, Dataset]:
    transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.ConvertImageDtype(torch.float32),
        ]
    )
    full_dataset = datasets.CIFAR10(root="../data/cifar-10", train=True, download=True, transform=transform)
    # we use the test dataset for training, similar to the CIFAR-10H experiment
    train_dataset = datasets.CIFAR10(root="../data/cifar-10", train=False, download=True, transform=transform)

    # This dataset will be used for testing and validation.
    #   30% of the data will be used for validation, and 70% for testing.
    test_size = int(0.7 * len(full_dataset))
    val_size = len(full_dataset) - test_size
    test_dataset, val_dataset = torch.utils.data.random_split(
        full_dataset, [test_size, val_size], generator=torch.Generator().manual_seed(229)
    )

    return train_dataset, test_dataset, val_dataset

In [3]:
def get_ml_data(cifar10_train_dataset, cifar10_test_dataset):
    # Prepare data for ML models
    X_cifar10 = np.array([img.numpy().flatten() for img, _ in cifar10_train_dataset])
    y_cifar10 = np.array([label for _, label in cifar10_train_dataset])

    X_cifar10_test = np.array([img.numpy().flatten() for img, _ in cifar10_test_dataset])
    y_cifar10_test = np.array([label for _, label in cifar10_test_dataset])

    # Scale the data for ML models
    scaler = StandardScaler()
    X_cifar10_scaled = scaler.fit_transform(X_cifar10)
    X_cifar10_scaled_test = scaler.transform(X_cifar10_test)

    return X_cifar10_scaled, y_cifar10, X_cifar10_scaled_test, y_cifar10_test

In [4]:
cifar10_train_dataset, cifar10_test_dataset, cifar10_val_dataset = load_cifar10() 
cifar10_train_loader = DataLoader(cifar10_train_dataset, batch_size=128, shuffle=True)
cifar10_test_loader = DataLoader(cifar10_test_dataset, batch_size=128, shuffle=False)
cifar10_val_loader = DataLoader(cifar10_val_dataset, batch_size=128, shuffle=False)
print(
    f"CIFAR-10 dataset loaded with {len(cifar10_train_dataset)} training, {len(cifar10_test_dataset)} test, and {len(cifar10_val_dataset)} validation samples"
)
X_cifar10, y_cifar10, X_cifar10_test, y_cifar10_test = get_ml_data(cifar10_train_dataset, cifar10_test_dataset)

Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 dataset loaded with 10000 training, 35000 test, and 15000 validation samples


# Training
Training is done on the CIFAR-10 test set. Evaluation is done on the CIFAR-10 train set, which we use as a test set.

In [5]:
def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    criterion: nn.Module,
    optimizer: optim.Optimizer,
    num_epochs: int,
) -> nn.Module:
    device = torch.device(
        "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
    )
    print(f"Using device: {device}")
    model = model.to(device)

    best_val_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Validation phase
        model.eval()
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)

                if len(labels.shape) > 1:  # For soft labels
                    _, predicted = torch.max(outputs.data, 1)
                    _, labels = torch.max(labels, 1)
                else:  # For hard labels
                    _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                val_loss += criterion(outputs, labels).item()

        accuracy = 100 * correct / total
        val_loss = val_loss / len(val_loader)
        print(
            f"Epoch [{epoch+1}/{num_epochs}] Train Loss: {running_loss/len(train_loader):.4f}, Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%"
        )

        # Save model if validation accuracy improves
        if accuracy > best_val_acc:
            best_val_acc = accuracy
            torch.save(model.state_dict(), f"models/{model.__class__.__name__}_cifar10.pth")
            print(f"Saved model with improved validation accuracy: {accuracy:.2f}%")

    return model

## Training Neural Networks

In [6]:
def train_nn_model(
    model, cifar10_train_loader: DataLoader, cifar10_val_loader: DataLoader, num_epochs: int = 20, lr: float = 0.001
) -> list:
    print(f"\nTraining {model.__class__.__name__} on CIFAR-10...")

    # Adjust the final layer for CIFAR-10
    if isinstance(model, models.ResNet):
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 10)
    elif isinstance(model, models.VGG):
        num_ftrs = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(num_ftrs, 10)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model = train_model(
        model=model,
        train_loader=cifar10_train_loader,
        val_loader=cifar10_val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=num_epochs,
    )

def evaluate_nn_model(model, cifar10_test_loader):
    model.load_state_dict(
        torch.load(f"models/{model.__class__.__name__}_cifar10.pth", weights_only=True)
    )
    model.eval()

    correct = 0
    total = 0
    device = next(model.parameters()).device
    with torch.no_grad():
        for images, labels in cifar10_test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"{model.__class__.__name__} Accuracy on CIFAR-10 test set: {accuracy:.2f}%")

In [7]:
resnet_model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
train_nn_model(resnet_model, cifar10_train_loader, cifar10_val_loader, lr=0.01)
evaluate_nn_model(resnet_model, cifar10_test_loader)


Training ResNet on CIFAR-10...
Using device: mps
Epoch [1/20] Train Loss: 1.3078, Validation Loss: 1.3234, Accuracy: 53.88%
Saved model with improved validation accuracy: 53.88%
Epoch [2/20] Train Loss: 0.8497, Validation Loss: 1.0143, Accuracy: 66.02%
Saved model with improved validation accuracy: 66.02%
Epoch [3/20] Train Loss: 0.6114, Validation Loss: 0.8822, Accuracy: 71.39%
Saved model with improved validation accuracy: 71.39%
Epoch [4/20] Train Loss: 0.4668, Validation Loss: 0.9592, Accuracy: 70.69%
Epoch [5/20] Train Loss: 0.3578, Validation Loss: 1.1160, Accuracy: 68.58%
Epoch [6/20] Train Loss: 0.2868, Validation Loss: 1.4007, Accuracy: 65.35%
Epoch [7/20] Train Loss: 0.2403, Validation Loss: 1.2300, Accuracy: 67.98%
Epoch [8/20] Train Loss: 0.2920, Validation Loss: 1.2564, Accuracy: 67.11%
Epoch [9/20] Train Loss: 0.2421, Validation Loss: 1.0697, Accuracy: 71.74%
Saved model with improved validation accuracy: 71.74%
Epoch [10/20] Train Loss: 0.1905, Validation Loss: 1.2155, 

In [8]:
vgg_model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
train_nn_model(vgg_model, cifar10_train_loader, cifar10_val_loader, lr=0.01)
evaluate_nn_model(vgg_model, cifar10_test_loader)


Training VGG on CIFAR-10...
Using device: mps
Epoch [1/20] Train Loss: 2.3457, Validation Loss: 2.3116, Accuracy: 9.88%
Saved model with improved validation accuracy: 9.88%
Epoch [2/20] Train Loss: 2.3088, Validation Loss: 2.3043, Accuracy: 9.88%
Epoch [3/20] Train Loss: 2.3057, Validation Loss: 2.3064, Accuracy: 10.25%
Saved model with improved validation accuracy: 10.25%
Epoch [4/20] Train Loss: 2.3073, Validation Loss: 2.3053, Accuracy: 10.07%
Epoch [5/20] Train Loss: 2.3053, Validation Loss: 2.3074, Accuracy: 9.83%
Epoch [6/20] Train Loss: 2.3052, Validation Loss: 2.3047, Accuracy: 9.93%
Epoch [7/20] Train Loss: 2.3058, Validation Loss: 2.3040, Accuracy: 9.93%
Epoch [8/20] Train Loss: 2.3045, Validation Loss: 2.3040, Accuracy: 9.65%
Epoch [9/20] Train Loss: 2.3047, Validation Loss: 2.3031, Accuracy: 9.93%
Epoch [10/20] Train Loss: 2.3050, Validation Loss: 2.3028, Accuracy: 10.15%
Epoch [11/20] Train Loss: 2.3038, Validation Loss: 2.3027, Accuracy: 10.15%
Epoch [12/20] Train Loss: 

## Training Machine Learning Models

In [9]:
def train_ml_models(model, X_cifar10_scaled, y_cifar10):
    # Machine Learning models

    print(f"\nTraining {model.__class__.__name__} on CIFAR-10...")
    model.fit(X_cifar10_scaled, y_cifar10)  # Use scaled data


def evaluate_ml_models(model, X_cifar10_scaled, y_cifar10):
    y_pred = model.predict(X_cifar10_scaled)  # Use scaled data
    accuracy = accuracy_score(y_cifar10, y_pred)
    accuracy = 100 * accuracy
    print(f"{model.__class__.__name__} Accuracy on CIFAR-10 test set: {accuracy:.2f}%")

In [10]:
logistic_model = LogisticRegression(max_iter=3000, n_jobs=-1)
train_ml_models(logistic_model, X_cifar10, y_cifar10)
evaluate_ml_models(logistic_model, X_cifar10_test, y_cifar10_test)


Training LogisticRegression on CIFAR-10...
LogisticRegression Accuracy on CIFAR-10 test set: 28.01%


In [11]:
random_forest_model = RandomForestClassifier(n_jobs=-1)
train_ml_models(random_forest_model, X_cifar10, y_cifar10)
evaluate_ml_models(random_forest_model, X_cifar10_test, y_cifar10_test)


Training RandomForestClassifier on CIFAR-10...
RandomForestClassifier Accuracy on CIFAR-10 test set: 41.64%


In [12]:
xgb_model = XGBClassifier(n_jobs=-1)
train_ml_models(xgb_model, X_cifar10, y_cifar10)
evaluate_ml_models(xgb_model, X_cifar10_test, y_cifar10_test)



Training XGBClassifier on CIFAR-10...
XGBClassifier Accuracy on CIFAR-10 test set: 47.10%
