### M7 Bonus 
#### Training a Convolutional Neural Net on the CIFAR-10 Dataset

In [6]:
# importing libraries
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [7]:
# configuring machine to utilize GPU with cuda
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

mps


In [8]:
# loading cifar-10 training and testing with transforming it

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

# loading cifar-10 training data
cifar10_training_data = datasets.CIFAR10(
    './data/cifar10', 
    train=True, 
    download=True,
    transform=transform
)

# loading cifar-10 test data
cifar10_testing_data = datasets.CIFAR10(
    './data/cifar10', 
    train=False, 
    transform=transform
)

Files already downloaded and verified


In [9]:
# defining cnn model

class ConvNeuralNet(nn.Module):
    """    
    Convolutional Neural Network for classifying CIFAR-10 dataset.

    Attributes:
        convolution_layer (nn.Sequential): A sequential container of convolutional layers
            that includes convolution, batch normalization, ReLU activations, and pooling.
        fullyconnected_layer (nn.Sequential): A sequential container of fully connected
            layers with dropout for regularization.
    """
    def __init__(self):
        super().__init__()

        self.convolution_layer = nn.Sequential(
            # first block : (input: 3 channels, output: 32 channels)
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # second block : (input: 64 channels, output: 128 channels)
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),

            # third block : (input: 128 channels, output: 256 channels)
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fullyconnected_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),  # (input: 4096 features, output: 1024 features)
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),   # (input: 1024 features, output: 512 features)
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)  # final layer (input: 512 features, output: 10 classes)
        )

    def forward(self, x):
        # convolution
        x = self.convolution_layer(x)

        # flatten view
        x = x.view(x.size(0), -1)

        # fully connected layer
        x = self.fullyconnected_layer(x)
        
        return x
    
# model = ConvNeuralNet()
# print(model)

In [10]:
# training function
def training(model, train_dataloader, optimizer, criterion, epoch, device, print_freq=10):
    model.to(device)
    model.train()
    training_loss = 0
    print(f"Epoch: {epoch}")
    for batch_idx, (data, labels) in enumerate(train_dataloader):
        data, labels = data.to(device), labels.to(device)

        # zero gradients
        optimizer.zero_grad()

        # propagation
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print stats
        training_loss += loss.item() * data.shape[0]


        if not (batch_idx % print_freq):
            print(f"Batch: {batch_idx}/{len(train_dataloader)} | ", 
                  f"Training Loss: {loss.item():.4f} | "
            )
    return training_loss / len(train_dataloader.dataset)

# testing function
def testing(model, test_dataloader, criterion, device):
    model.to(device)
    model.eval()

    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_dataloader:
            # move data and target to the specified device
            data, target = data.to(device), target.to(device)

            # forward pass 
            output = model(data)

            # calculate loss
            test_loss += criterion(output, target).item() * data.size(0)

            # get predictions
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

    # average loss
    average_loss = test_loss / total
    # calculate accuracy
    accuracy = correct / total * 100

    print(f'Test Loss: {average_loss:.4f}, Accuracy: {accuracy:.2f}%')
    
    return average_loss, accuracy

In [11]:
def train_test(model, train_dataloader, test_dataloader, optimizer, criterion, num_epochs, device):
    for epoch in range(1, num_epochs+1):
        # train the model
        train_loss = training(model, train_dataloader, optimizer, criterion, epoch, device)

    # test the model
    test_loss, test_accuracy = testing(model, test_dataloader, criterion, device)

    return train_loss, test_loss, test_accuracy


# init params
batch_size = 128
epochs = 10
learning_rate = 0.001
momentum = 0.9

# init model
model = ConvNeuralNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# load cifar-10 dataloaders

# trainloader
cifar_trainloader = DataLoader(
    cifar10_training_data,
    batch_size=batch_size,
    shuffle=True
)

# testloader
cifar_testloader = DataLoader(
    cifar10_testing_data,
    batch_size=batch_size,
    shuffle=False
)

# init model, optimizer, criterion
model = ConvNeuralNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# call train_test
train_loss, test_loss, test_accuracy = train_test(
    model, 
    cifar_trainloader, 
    cifar_testloader, 
    optimizer, 
    criterion, 
    epochs, 
    device
)

print(f"Final Training Loss: {train_loss:.4f}, Final Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch: 1
Batch: 0/391 |  Training Loss: 2.3016 | 
Batch: 10/391 |  Training Loss: 2.0081 | 
Batch: 20/391 |  Training Loss: 1.9946 | 
Batch: 30/391 |  Training Loss: 1.7033 | 
Batch: 40/391 |  Training Loss: 1.8636 | 
Batch: 50/391 |  Training Loss: 1.6799 | 
Batch: 60/391 |  Training Loss: 1.5870 | 
Batch: 70/391 |  Training Loss: 1.6619 | 
Batch: 80/391 |  Training Loss: 1.5527 | 
Batch: 90/391 |  Training Loss: 1.3482 | 
Batch: 100/391 |  Training Loss: 1.5115 | 
Batch: 110/391 |  Training Loss: 1.5210 | 
Batch: 120/391 |  Training Loss: 1.3608 | 
Batch: 130/391 |  Training Loss: 1.4018 | 
Batch: 140/391 |  Training Loss: 1.3567 | 
Batch: 150/391 |  Training Loss: 1.4488 | 
Batch: 160/391 |  Training Loss: 1.3592 | 
Batch: 170/391 |  Training Loss: 1.3173 | 
Batch: 180/391 |  Training Loss: 1.2713 | 
Batch: 190/391 |  Training Loss: 1.3970 | 
Batch: 200/391 |  Training Loss: 1.1959 | 
Batch: 210/391 |  Training Loss: 1.3212 | 
Batch: 220/391 |  Training Loss: 1.4122 | 
Batch: 230/39

Without re-implementing ResNet architectures and data augmentations:

Run 1 Results:<br>
(batch size = 64)<br>
Final Training Loss: 0.2981, Final Test Loss: 0.6369, Test Accuracy: 80.62%

Run 2 Results:<br>
(batch size = 64)<br>
Final Training Loss: 0.3409, Final Test Loss: 0.5919, Test Accuracy: 81.60%

Run 3 Results:<br>
(batch size = 128)<br>
Final Training Loss: 0.2351, Final Test Loss: 0.6708, Test Accuracy: 81.02%