# Load in and pre-process data

In [1]:
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

project_root = "~/code/personal/alexnet"
data_path = project_root + "/data"
weights_path = project_root + "/weights"

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((32)),
    transforms.CenterCrop((32, 32)),
])

cifar10_classes = CIFAR10(root=data_path, train=True, download=True).classes

train_dataset = CIFAR10(root=data_path, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=data_path, train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


# Define model architecture

In [3]:
from torch import nn

class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        
        self.l1 = nn.Sequential(
            nn.Conv2d(3, 48, 3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=48),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.l2 = nn.Sequential(
            nn.Conv2d(48, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.l3 = nn.Sequential(
            nn.Conv2d(128, 192, 3, stride=1, padding=1),
            nn.ReLU(),
        )
        self.l4 = nn.Sequential(
            nn.Conv2d(192, 192, 3, stride=1, padding=1),
            nn.ReLU(),
        )
        self.l5 = nn.Sequential(
            nn.Conv2d(192, 128, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc1 = nn.Sequential(
            nn.Linear(128 * 4 * 4, 512),  # Adjust the input size based on the new feature map size
            nn.ReLU(),
        )
        self.fc2 = nn.Sequential(
            nn.Linear(512, 512),
            nn.ReLU(),
        )
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = self.l5(x)

        x = x.view(-1, 128 * 4 * 4)

        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

        return x


# Train model

In [4]:
alexnet = AlexNet()

In [5]:
import torch.optim as optim

criterion = torch.nn.CrossEntropyLoss()
optimiser = optim.SGD(alexnet.parameters(), momentum=0.9, lr=0.005, weight_decay=0.005)

In [6]:
import torch

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# Move the model to CUDA
alexnet.to(device)

Device: cuda


AlexNet(
  (l1): Sequential(
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (l2): Sequential(
    (0): Conv2d(48, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (l3): Sequential(
    (0): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (l4): Sequential(
    (0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
  )
  (l5): Sequential(
    (0): Conv2d(192, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, 

In [None]:
from liveloss.liveloss import LiveLoss

l = LiveLoss()

# Inside the training loop
for epoch in range(25):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU

        optimiser.zero_grad()

        outputs = alexnet(inputs)
        labels = labels.squeeze()

        loss = criterion(outputs, labels)
        loss.backward()
        optimiser.step()

        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    l.send(epoch+1, epoch_loss)
    print(f"Current loss: {epoch_loss:.3f}")
    running_loss = 0.0 

    model_path = weights_path + f'/alexnet_epoch_{epoch+1}.pth'
    torch.save(alexnet.state_dict(), model_path)
    print(f'Model saved at epoch {epoch+1}.')

print('Finished Training')

# Validation and inference

In [None]:
import numpy as np

# Assuming you have already trained your model and saved its weights
# Load the trained model
alexnet = AlexNet()
alexnet.load_state_dict(torch.load('model/alexnet_epoch_25.pth'))
alexnet.eval()

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
alexnet.to(device)

# Get a batch from the test loader
test_iter = iter(test_loader)
sample_inputs, sample_labels = next(test_iter)
sample_inputs, sample_labels = sample_inputs.to(device), sample_labels.to(device)

# Perform inference
with torch.no_grad():
    output = alexnet(sample_inputs)

# Convert the output to probabilities using softmax (if your model is for classification)
probabilities = torch.nn.functional.softmax(output, dim=1)

# Get the predicted classes
predicted_classes = torch.argmax(probabilities, dim=1)

predicted_classes = predicted_classes.cpu().numpy()
sample_labels = sample_labels.cpu().numpy()

print(predicted_classes)
print(sample_labels)

# Find the common elements
common_elements = np.equal(predicted_classes, sample_labels)

# Calculate the percentage of common elements
percentage_common = np.sum(common_elements) / len(predicted_classes) * 100

print(f"Percentage of common elements: {percentage_common:.2f}%")

In [None]:
def get_accuracy_for_batch(sample_inputs, sample_labels) -> float:
    """
    Given a batch from the train split, calculate the inference accuracy
    """

    sample_inputs, sample_labels = sample_inputs.to(device), sample_labels.to(device)

    with torch.no_grad():
        output = alexnet(sample_inputs)

    # Convert the output to probabilities using softmax
    probabilities = torch.nn.functional.softmax(output, dim=1)

    predicted_classes = torch.argmax(probabilities, dim=1)

    predicted_classes = predicted_classes.cpu().numpy()
    sample_labels = sample_labels.cpu().numpy()

    # Find the common elements and calculate accuracy
    common_elements = np.equal(predicted_classes, sample_labels)
    accuracy = np.sum(common_elements) / len(predicted_classes) * 100
    return accuracy


test_iter = iter(test_loader)
sample_inputs, sample_labels = next(test_iter)
get_accuracy_for_batch(sample_inputs, sample_labels)

accuracies = []

for i, (sample_inputs, sample_labels) in enumerate(test_loader):
    accuracies.append(get_accuracy_for_batch(sample_inputs, sample_labels))

print(sum(accuracies) / len(accuracies))

66.79% accuracy