In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Kaggle credentials set.
Kaggle credentials successfully validated.


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

deep_learning_spring_2025_project_1_path = kagglehub.competition_download('deep-learning-spring-2025-project-1')

print('Data source import complete.')


Downloading from https://www.kaggle.com/api/v1/competitions/data/download-all/deep-learning-spring-2025-project-1...


100%|██████████| 189M/189M [00:00<00:00, 217MB/s]

Extracting files...





Data source import complete.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Custom ResNet for CIFAR-10 on CUDA

This notebook implements a custom ResNet model for CIFAR-10 using PyTorch. It includes two major sections:

1. **Training Section:** Loads the CIFAR-10 training and test sets via torchvision, trains the model, and saves a checkpoint.

2. **Inference Section:** Loads a test batch from a pickle file (with no labels), runs inference using the trained model (if available), and saves the predictions to a CSV file.

Make sure the test pickle file is located at `/kaggle/input/deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl` for inference.

In [None]:
import os
import pickle
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as transforms
import pandas as pd

from PIL import Image

In [None]:
# Model Definitions

class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out


class CustomResNet(nn.Module):
    def __init__(self, blocks_per_stage=[3, 4, 4, 3], base_channels=32, num_classes=10):
        """
        Args:
            blocks_per_stage (list): Four integers specifying the number of residual blocks per stage.
            base_channels (int): Number of channels for the initial convolution.
            num_classes (int): Number of output classes.
        """
        super(CustomResNet, self).__init__()
        self.in_channels = base_channels
        # Initial convolution for CIFAR-10 (3-channel, 32x32 images)
        self.conv1 = nn.Conv2d(3, base_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(base_channels)
        self.relu = nn.ReLU(inplace=True)

        self.layer1 = self._make_layer(base_channels, blocks_per_stage[0], stride=1)
        self.layer2 = self._make_layer(base_channels * 2, blocks_per_stage[1], stride=2)
        self.layer3 = self._make_layer(base_channels * 4, blocks_per_stage[2], stride=2)
        self.layer4 = self._make_layer(base_channels * 8, blocks_per_stage[3], stride=2)

        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(base_channels * 8, num_classes)

    def _make_layer(self, out_channels, num_blocks, stride):
        layers = []
        layers.append(BasicBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, num_blocks):
            layers.append(BasicBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


In [None]:
# Data Loading Function for CIFAR-10 using torchvision
def get_data(batch_size=128):
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    return train_loader, test_loader


In [None]:
# Training and Testing Functions
def train(model, train_loader, optimizer, criterion, device, epoch, print_interval=50):
    model.train()
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(train_loader, 1):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % print_interval == 0:
            print(f"Epoch: {epoch} | Iteration: {i} | Loss: {running_loss / print_interval:.3f}")
            running_loss = 0.0

def test(model, test_loader, device, epoch):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    accuracy = correct / total
    print(f"Epoch {epoch}: Test accuracy: {accuracy:.3f}")
    return accuracy


In [None]:
# Set device: Use Apple MPS if available, otherwise CUDA or CPU
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print("Using device:", device)

# Instantiate the model and print the parameter count
model = CustomResNet(blocks_per_stage=[3, 4, 4, 3], base_channels=32, num_classes=10).to(device)
print("Total parameters:", count_parameters(model))


Using device: cuda
Total parameters: 4735658


In [None]:
# Training Mode
# This cell trains the model on CIFAR-10 and saves a checkpoint.

train_loader, test_loader = get_data(batch_size=128)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
epochs = 60  # Adjust epochs as needed

for epoch in range(1, epochs + 1):
    train(model, train_loader, optimizer, criterion, device, epoch)
    test(model, test_loader, device, epoch)

# Save the trained model checkpoint
torch.save(model.state_dict(), "model_checkpoint.pth")
print("Model checkpoint saved to model_checkpoint.pth")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 43.8MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch: 1 | Iteration: 50 | Loss: 1.971
Epoch: 1 | Iteration: 100 | Loss: 1.688
Epoch: 1 | Iteration: 150 | Loss: 1.566
Epoch: 1 | Iteration: 200 | Loss: 1.478
Epoch: 1 | Iteration: 250 | Loss: 1.395
Epoch: 1 | Iteration: 300 | Loss: 1.321
Epoch: 1 | Iteration: 350 | Loss: 1.245
Epoch 1: Test accuracy: 0.499
Epoch: 2 | Iteration: 50 | Loss: 1.160
Epoch: 2 | Iteration: 100 | Loss: 1.116
Epoch: 2 | Iteration: 150 | Loss: 1.076
Epoch: 2 | Iteration: 200 | Loss: 1.036
Epoch: 2 | Iteration: 250 | Loss: 0.993
Epoch: 2 | Iteration: 300 | Loss: 0.939
Epoch: 2 | Iteration: 350 | Loss: 0.962
Epoch 2: Test accuracy: 0.680
Epoch: 3 | Iteration: 50 | Loss: 0.861
Epoch: 3 | Iteration: 100 | Loss: 0.860
Epoch: 3 | Iteration: 150 | Loss: 0.787
Epoch: 3 | Iteration: 200 | Loss: 0.810
Epoch: 3 | Iteration: 250 | Loss: 0.776
Epoch: 3 | Iteration: 300 | Loss: 0.737
Epoch: 3 | Iteration: 350 | Loss: 0.752
Epoch 3: Test 

In [None]:
# Template code for reading the test file
def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        batch = pickle.load(fo, encoding='bytes')
    return batch

# Load the test batch (update the file path if necessary)
cifar10_batch = load_cifar_batch('/content/cifar_test_nolabel.pkl')

# Extract images; the test data is in (N x W x H x C) format
images = cifar10_batch[b'data']
print(f"Loaded test batch with {images.shape[0]} images")


Loaded test batch with 10000 images


# New Section

In [None]:
# Create a dataset from the images array
class TestDatasetFromArray(Dataset):
    def __init__(self, images, transform=None):
        self.images = images
        if transform is None:
            # Default transform: convert numpy array to PIL Image, then to tensor, then normalize
            self.transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
            ])
        else:
            self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        if isinstance(img, np.ndarray):
            img = img.astype('uint8')
        if self.transform:
            img = self.transform(img)
        return img

# Create the test dataset and dataloader
test_dataset = TestDatasetFromArray(images)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)


In [None]:
# Inference Mode
# This cell loads a saved model checkpoint (if available) and runs inference on the test dataset.
# Predictions are then saved to a CSV file.

if os.path.exists("model_checkpoint.pth"):
    model.load_state_dict(torch.load("model_checkpoint.pth", map_location=device))
    print("Loaded model checkpoint from model_checkpoint.pth")
else:
    print("No checkpoint found. Running inference with untrained model.")

model.eval()
predictions = []
with torch.no_grad():
    for batch in test_loader:
        batch = batch.to(device)
        outputs = model(batch)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy().tolist())

# Save predictions to CSV using pandas
df = pd.DataFrame({"Id": range(len(predictions)), "Prediction": predictions})
df.to_csv("cifar_test_predictions.csv", index=False)
print("Predictions saved to cifar_test_predictions.csv")


  model.load_state_dict(torch.load("model_checkpoint.pth", map_location=device))


Loaded model checkpoint from model_checkpoint.pth
Predictions saved to cifar_test_predictions.csv


## Instructions

1. **Training Section:** Run the training cell to train the model on CIFAR-10. The model checkpoint will be saved as `model_checkpoint.pth`.
2. **Inference Section:** Ensure the test pickle file is available at `/kaggle/input/deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl`. Then run the inference cell to generate predictions, which will be saved to `cifar_test_predictions.csv`.

Feel free to adjust hyperparameters (like epochs) as needed.