In [1]:
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.data import random_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import time

In [2]:
torch.random.manual_seed(0)

<torch._C.Generator at 0x7fbf6cc866d0>

In [3]:

project_name='cifar10-resnetV2-20'

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# Hyper-parameters
num_epochs = 80
learning_rate = 0.001


cuda


# Data

In [4]:
def get_datasets(batch_size=32):
    # Transforms
    augment_pipeline = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.Pad((4, 4)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop((32, 32))
    ])

    evaluate_pipeline = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    train_size = 45_000
    val_size = 5_000

    train_data = torchvision.datasets.CIFAR10(
        root="./data",
        train=True,
        transform=augment_pipeline,
        download=True
    )
    test_ds = torchvision.datasets.CIFAR10(
        root="./data",
        train=False,
        transform=evaluate_pipeline,
        download=True
    )
    
    train_ds, val_ds = torch.utils.data.random_split(train_data, [train_size, val_size])
    val_ds.transform = evaluate_pipeline
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size)
    test_loader = DataLoader(test_ds, batch_size=batch_size)

    return train_loader, val_loader, test_loader

# Model

In [5]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, channels, stride=1):
        super(ResidualBlock, self).__init__()
        conv_kwargs = {
            "kernel_size": (3, 3),
            "padding": 1,  # To ensure 3x3 conv does not reduce image size. padding=1 also works
            "bias": False
        }
        self.stride = stride
        self.in_channels = in_channels
        self.channels = channels
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.relu = nn.ReLU()
        # This conv is in_channels -> channels and applies stride
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=channels, stride=stride, **conv_kwargs)
        self.bn2 = nn.BatchNorm2d(channels)
        # This conv is channels -> channels
        self.conv2 = nn.Conv2d(in_channels=channels, out_channels=channels, **conv_kwargs)
    
    def strided_identity(self, x):
        # Downsample with 'nearest' method (this is striding if dims are divisible by stride)
        # Equivalently x = x[:, :, ::stride, ::stride].contiguous()
        if self.stride != 1:
            x = nn.functional.interpolate(x, mode='nearest', scale_factor=(1/self.stride))
        # Create padding tensor for extra channels
        if self.channels != self.in_channels:
            (b, c, h, w) = x.shape
            num_pad_channels = self.channels - self.in_channels
            pad = torch.zeros((b, num_pad_channels, h, w), device=x.device)
            # Append padding to the downsampled identity
            x = torch.cat((x, pad), dim=1)
        return x

    def forward(self, x):
        identity = self.strided_identity(x)
        z = self.bn1(x)
        z = self.relu(z)
        z = self.conv1(z)
        z = self.bn2(z)
        z = self.relu(z)
        z = self.conv2(z)
        out = identity + z
        return out
      

In [6]:
class ResNetV2(nn.Module):
  def __init__(self):
    super(ResNetV2, self).__init__()

    self.input_layer = nn.Conv2d(3, 16, (3,3), padding=1)

    self.layer_1 = ResidualBlock(16,16)
    self.layer_2 = ResidualBlock(16,16)
    self.layer_3 = ResidualBlock(16,16)

    self.layer_4 = ResidualBlock(16,32, stride=2)
    self.layer_5 = ResidualBlock(32,32)
    self.layer_6 = ResidualBlock(32,32)

    self.layer_7 = ResidualBlock(32,64, stride=2)
    self.layer_8 = ResidualBlock(64,64)
    self.layer_9 = ResidualBlock(64,64)

    self.pool = nn.AdaptiveAvgPool2d((1,1))
    self.output_layer = nn.Linear(64,10)


  def forward(self, x):
    out = self.input_layer(x)
    out = self.layer_1(out)
    out = self.layer_2(out)
    out = self.layer_3(out)
    out = self.layer_4(out)
    out = self.layer_5(out)
    out = self.layer_6(out)
    out = self.layer_7(out)
    out = self.layer_8(out)
    out = self.layer_9(out)
    out = self.pool(out)
    out = out.flatten(1)
    out = out.view(out.size(0), -1)
    out = self.output_layer(out)
    return out

model = ResNetV2().to(device)


In [7]:
inputs = torch.randn((4, 3, 32, 32)).to(device)
outputs = model(inputs)
outputs.shape

torch.Size([4, 10])

# Training

In [8]:
def train_epoch(model, criterion, optimizer, train_loader, epoch):
    metrics = {}
    running_loss = 0
    running_acc = 0
    
    start_epoch = time.time()
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        inputs = inputs.to(device)
        labels = labels.to(device)

        logits = model(inputs)
        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        preds = logits.detach().cpu().numpy().argmax(axis=-1)
        running_acc += accuracy_score(labels.cpu(), preds)
    
    end_epoch = time.time()
    epoch_time = end_epoch - start_epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = running_acc / len(train_loader)

    metrics["epoch_train_time"] = end_epoch - start_epoch
    metrics["loss"] = epoch_loss
    metrics["accuracy"] = epoch_acc
    
    print(f"Epoch {epoch + 1}: train loss = {epoch_loss:.4f}, train accuracy = {epoch_acc * 100:.2f}%, epoch time = {epoch_time:.2f} s")
    return metrics

In [9]:
def eval_epoch(model, criterion, val_loader, epoch):
    metrics = {}
    running_loss = 0
    all_preds = []
    all_labels = []
    
    start_epoch = time.time()
    model.eval()
    for inputs, labels in val_loader:
        with torch.no_grad():
            inputs = inputs.to(device)
            labels = labels.to(device)

            logits = model(inputs)
            loss = criterion(logits, labels)

            running_loss += loss.item()
            preds = logits.detach().cpu().numpy().argmax(axis=-1)

            all_preds.extend(preds.tolist())
            all_labels.extend(labels.tolist())
    end_epoch = time.time()
    epoch_time = end_epoch - start_epoch
    epoch_loss = running_loss / len(val_loader)
    epoch_acc = accuracy_score(all_labels, all_preds)

    metrics["average_batch_inference_time"] = epoch_time / len(val_loader)
    metrics["loss"] = epoch_loss
    metrics["accuracy"] = epoch_acc
    
    print(f"Epoch {epoch + 1}: val loss = {epoch_loss:.4f}, val accuracy = {epoch_acc * 100:.2f}%, val time = {epoch_time:.2f} s")
    return metrics

In [10]:
model = ResNetV2().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
train_loader, val_loader, test_loader = get_datasets(batch_size=128)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
start_train = time.time()
train_epoch_times = []
eval_batch_times = []

for epoch in range(10):
    train_metrics = train_epoch(model, loss_fn, optimizer, train_loader, epoch)
    val_metrics = eval_epoch(model, loss_fn, val_loader, epoch)

    train_epoch_times.append(train_metrics["epoch_train_time"])
    eval_batch_times.append(val_metrics["average_batch_inference_time"])
end_train = time.time()

test_metrics = eval_epoch(model, loss_fn, test_loader, 0)

Epoch 1: train loss = 1.4881, train accuracy = 44.99%, epoch time = 33.06 s
Epoch 1: val loss = 1.3275, val accuracy = 52.08%, val time = 2.65 s
Epoch 2: train loss = 1.0873, train accuracy = 60.91%, epoch time = 32.37 s
Epoch 2: val loss = 0.9774, val accuracy = 65.02%, val time = 2.63 s
Epoch 3: train loss = 0.9290, train accuracy = 66.98%, epoch time = 33.03 s
Epoch 3: val loss = 1.0778, val accuracy = 63.18%, val time = 2.77 s
Epoch 4: train loss = 0.8139, train accuracy = 71.29%, epoch time = 33.06 s
Epoch 4: val loss = 1.0614, val accuracy = 65.14%, val time = 2.60 s
Epoch 5: train loss = 0.7253, train accuracy = 74.68%, epoch time = 32.55 s
Epoch 5: val loss = 0.8284, val accuracy = 72.24%, val time = 2.66 s
Epoch 6: train loss = 0.6674, train accuracy = 76.87%, epoch time = 33.31 s
Epoch 6: val loss = 0.8216, val accuracy = 71.10%, val time = 2.72 s
Epoch 7: train loss = 0.6278, train accuracy = 78.35%, epoch time = 32.20 s
Epoch 7: val loss = 0.7176, val accuracy = 75.10%, val

In [12]:
metrics = {
    "model_name": "ResNetV2-20",
    "framework_name": "PyTorch",
    "dataset": "CIFAR-10",
    "task": "classification",
    "total_training_time": end_train - start_train,
    "average_epoch_training_time": np.mean(train_epoch_times),
    "average_batch_inference_time": np.mean(eval_batch_times),
    "final_training_loss": train_metrics["loss"],
    "final_evaluation_accuracy": val_metrics["accuracy"],
    "final_test_accuracy": test_metrics["accuracy"]
}

In [13]:
print(metrics)

{'model_name': 'ResNetV2-20', 'framework_name': 'PyTorch', 'dataset': 'CIFAR-10', 'task': 'classification', 'total_training_time': 354.35734724998474, 'average_epoch_training_time': 32.761177444458006, 'average_batch_inference_time': 0.06674680054187775, 'final_training_loss': 0.524996715428477, 'final_evaluation_accuracy': 0.7588, 'final_test_accuracy': 0.7708}


In [15]:
import json
date_str = time.strftime("%Y-%m-%d-%H%M%S")
with open(f"./output/m2-pytorch-mlp-{date_str}.json", "w") as outfile:
    json.dump(metrics, outfile)