In [1]:
"""
This file contains the code to run the baseline experiments.

More specifically, we are investigating the performance of basic models on the CIFAR-10 and CIFAR-10H datasets. The tasks for these datasets are multi-class classification.

The basic models include:
    * ResNet-50
    * VGG-16
    * Logistic Regression
    * Random Forest
    * XGBoost
"""

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import os
from typing import Tuple

# Loading Data

In [2]:
# Load CIFAR-10H dataset and return a Dataset
def load_cifar10h() -> Dataset:
    cifar10h_probs_path = "../data/cifar-10h/cifar10h-probs.npy"
    if not os.path.exists(cifar10h_probs_path):
        raise FileNotFoundError(f"Soft labels not found at {cifar10h_probs_path}. Please ensure the CIFAR-10H data is downloaded.")

    cifar10h_probs = np.load(cifar10h_probs_path).astype(np.float32)
    cifar10_test = datasets.CIFAR10(
        root="../data/cifar-10", train=False, download=True, transform=transforms.ToTensor()
    )

    class CIFAR10H(Dataset):
        def __init__(self, cifar10_dataset: Dataset, soft_labels: np.ndarray):
            self.cifar10_dataset = cifar10_dataset
            self.soft_labels = soft_labels

        def __len__(self) -> int:
            return len(self.cifar10_dataset)

        def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
            image, _ = self.cifar10_dataset[idx]
            soft_label = torch.from_numpy(self.soft_labels[idx])
            return image.float(), soft_label

    cifar10h_dataset = CIFAR10H(cifar10_test, cifar10h_probs)
    return cifar10h_dataset

In [3]:
# Load CIFAR-10 dataset and return train, validation, and test DataLoaders
def load_cifar10() -> Tuple[Dataset, Dataset, Dataset]:
    transform = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.ConvertImageDtype(torch.float32),
        ]
    )
    full_dataset = datasets.CIFAR10(root="../data/cifar-10", train=True, download=True, transform=transform)

    # This dataset will be used for testing and validation.
    #   30% of the data will be used for validation, and 70% for testing.
    test_size = int(0.7 * len(full_dataset))
    val_size = len(full_dataset) - test_size
    test_dataset, val_dataset = torch.utils.data.random_split(
        full_dataset, [test_size, val_size], generator=torch.Generator().manual_seed(229)
    )

    return test_dataset, val_dataset

In [4]:
def get_ml_data(cifar10h_dataset, cifar10_test_dataset):
    # Prepare data for ML models
    X_cifar10h = np.array([img.numpy().flatten() for img, _ in cifar10h_dataset])
    y_cifar10h = np.array([np.argmax(label) for _, label in cifar10h_dataset])

    X_cifar10_test = np.array([img.numpy().flatten() for img, _ in cifar10_test_dataset])
    y_cifar10_test = np.array([label for _, label in cifar10_test_dataset])

    # Scale the data for ML models
    scaler = StandardScaler()
    X_cifar10h_scaled = scaler.fit_transform(X_cifar10h)
    X_cifar10_scaled_test = scaler.transform(X_cifar10_test)

    return X_cifar10h_scaled, y_cifar10h, X_cifar10_scaled_test, y_cifar10_test

In [5]:
cifar10h_dataset = load_cifar10h()
cifar10h_loader = DataLoader(cifar10h_dataset, batch_size=128, shuffle=True)
print(f"CIFAR-10H dataset loaded with {len(cifar10h_dataset)} samples")

cifar10_test_dataset, cifar10_val_dataset = load_cifar10()  # Changed variable name to reflect split
cifar10_test_loader = DataLoader(cifar10_test_dataset, batch_size=128, shuffle=False)
cifar10_val_loader = DataLoader(cifar10_val_dataset, batch_size=128, shuffle=False)
print(
    f"CIFAR-10 dataset loaded with {len(cifar10_test_dataset)} test and {len(cifar10_val_dataset)} validation samples"
)
X_cifar10h, y_cifar10h, X_cifar10_test, y_cifar10_test = get_ml_data(
    cifar10h_dataset, cifar10_test_dataset
)

Files already downloaded and verified
CIFAR-10H dataset loaded with 10000 samples
Files already downloaded and verified
CIFAR-10 dataset loaded with 35000 test and 15000 validation samples


# Training
Training is done on the CIFAR-10H dataset. Evaluation is done on the CIFAR-10 train set, which we use as a test set.

In [6]:
def train_model(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    criterion: nn.Module,
    optimizer: optim.Optimizer,
    num_epochs: int,
) -> nn.Module:
    device = torch.device(
        "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
    )
    print(f"Using device: {device}")
    model = model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if (i + 1) % 50 == 0:
                print(
                    f"  Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Train Loss: {running_loss/50:.4f}"
                )
                running_loss = 0.0

        # Validation phase
        model.eval()
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)

                if len(labels.shape) > 1:  # For soft labels
                    _, predicted = torch.max(outputs.data, 1)
                    _, labels = torch.max(labels, 1)
                else:  # For hard labels
                    _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                val_loss += criterion(outputs, labels).item()

        accuracy = 100 * correct / total
        val_loss = val_loss / len(val_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}] Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%")

    return model

## Training Neural Networks

In [7]:
def train_nn_model(
    model, cifar10h_loader: DataLoader, cifar10_val_loader: DataLoader, num_epochs: int = 20, lr: float = 0.001
) -> list:
    print(f"\nTraining {model.__class__.__name__} on CIFAR-10H...")

    # Adjust the final layer for CIFAR-10
    if isinstance(model, models.ResNet):
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, 10)
    elif isinstance(model, models.VGG):
        num_ftrs = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(num_ftrs, 10)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model = train_model(
        model=model,
        train_loader=cifar10h_loader,
        val_loader=cifar10_val_loader,
        criterion=criterion,
        optimizer=optimizer,
        num_epochs=num_epochs,
    )
    torch.save(model.state_dict(), f"models/{model.__class__.__name__}_cifar10h.pth")

def evaluate_nn_model(model, cifar10_test_loader):
    model.load_state_dict(
        torch.load(f"models/{model.__class__.__name__}_cifar10h.pth", weights_only=True)
    )
    model.eval()

    correct = 0
    total = 0
    device = next(model.parameters()).device
    with torch.no_grad():
        for images, labels in cifar10_test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"{model.__class__.__name__} Accuracy on CIFAR-10 test set: {accuracy:.2f}%")

In [8]:
resnet_model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
train_nn_model(resnet_model, cifar10h_loader, cifar10_val_loader, lr=0.01)
evaluate_nn_model(resnet_model, cifar10_test_loader)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /Users/jackle/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:02<00:00, 29.6MB/s]



Training ResNet on CIFAR-10H...
Using device: mps
  Epoch [1/20], Step [50/79], Train Loss: 1.4200
Epoch [1/20] Validation Loss: 1.3637, Accuracy: 55.83%
  Epoch [2/20], Step [50/79], Train Loss: 0.8788
Epoch [2/20] Validation Loss: 0.9484, Accuracy: 68.19%
  Epoch [3/20], Step [50/79], Train Loss: 0.6900
Epoch [3/20] Validation Loss: 0.9948, Accuracy: 67.32%
  Epoch [4/20], Step [50/79], Train Loss: 0.5706
Epoch [4/20] Validation Loss: 0.8675, Accuracy: 71.50%
  Epoch [5/20], Step [50/79], Train Loss: 0.4484
Epoch [5/20] Validation Loss: 0.9914, Accuracy: 69.73%
  Epoch [6/20], Step [50/79], Train Loss: 0.4166
Epoch [6/20] Validation Loss: 0.8313, Accuracy: 73.81%
  Epoch [7/20], Step [50/79], Train Loss: 0.4293
Epoch [7/20] Validation Loss: 0.9724, Accuracy: 69.63%
  Epoch [8/20], Step [50/79], Train Loss: 0.3694
Epoch [8/20] Validation Loss: 0.8233, Accuracy: 74.32%
  Epoch [9/20], Step [50/79], Train Loss: 0.3564
Epoch [9/20] Validation Loss: 0.8398, Accuracy: 73.51%
  Epoch [10/2

In [9]:
vgg_model = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
train_nn_model(vgg_model, cifar10h_loader, cifar10_val_loader, lr=0.001)
evaluate_nn_model(vgg_model, cifar10_test_loader)


Training VGG on CIFAR-10H...
Using device: mps
  Epoch [1/20], Step [50/79], Train Loss: 2.4263
Epoch [1/20] Validation Loss: 2.2472, Accuracy: 12.31%
  Epoch [2/20], Step [50/79], Train Loss: 2.2072
Epoch [2/20] Validation Loss: 2.0230, Accuracy: 17.27%
  Epoch [3/20], Step [50/79], Train Loss: 1.9179
Epoch [3/20] Validation Loss: 1.7945, Accuracy: 27.06%
  Epoch [4/20], Step [50/79], Train Loss: 1.8446
Epoch [4/20] Validation Loss: 1.8051, Accuracy: 26.39%
  Epoch [5/20], Step [50/79], Train Loss: 1.7638
Epoch [5/20] Validation Loss: 1.6950, Accuracy: 32.66%
  Epoch [6/20], Step [50/79], Train Loss: 1.6645
Epoch [6/20] Validation Loss: 1.5061, Accuracy: 38.39%
  Epoch [7/20], Step [50/79], Train Loss: 1.5940
Epoch [7/20] Validation Loss: 1.4741, Accuracy: 40.19%
  Epoch [8/20], Step [50/79], Train Loss: 1.4450
Epoch [8/20] Validation Loss: 1.5057, Accuracy: 44.57%
  Epoch [9/20], Step [50/79], Train Loss: 1.4280
Epoch [9/20] Validation Loss: 1.3142, Accuracy: 48.93%
  Epoch [10/20],

## Training Machine Learning Models

In [6]:
def train_ml_models(model, X_cifar10h_scaled, y_cifar10h):
    # Machine Learning models

    print(f"\nTraining {model.__class__.__name__} on CIFAR-10H...")
    model.fit(X_cifar10h_scaled, y_cifar10h)  # Use scaled data

def evaluate_ml_models(model, X_cifar10_scaled, y_cifar10):
    y_pred = model.predict(X_cifar10_scaled)  # Use scaled data
    accuracy = accuracy_score(y_cifar10, y_pred)
    accuracy = 100 * accuracy
    print(f"{model.__class__.__name__} Accuracy on CIFAR-10 test set: {accuracy:.2f}%")

In [None]:
logistic_model = LogisticRegression(max_iter=3000, n_jobs=-1)
train_ml_models(logistic_model, X_cifar10h, y_cifar10h)
evaluate_ml_models(logistic_model, X_cifar10_test, y_cifar10_test)

In [None]:
random_forest_model = RandomForestClassifier(n_jobs=-1)
train_ml_models(random_forest_model, X_cifar10h, y_cifar10h)
evaluate_ml_models(random_forest_model, X_cifar10_test, y_cifar10_test)

In [None]:
xgb_model = XGBClassifier(n_jobs=-1)
train_ml_models(xgb_model, X_cifar10h, y_cifar10h)
evaluate_ml_models(xgb_model, X_cifar10_test, y_cifar10_test)