### M7 Bonus 
#### Training a Convolutional Neural Net on the CIFAR-10 Dataset

In [1]:
# importing libraries
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
# configuring machine to utilize GPU with cuda
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
# loading cifar-10 training and testing with transforming it

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

# loading cifar-10 training data
cifar10_training_data = datasets.CIFAR10(
    './data/cifar10', 
    train=True, 
    download=True,
    transform=transform
)

# loading cifar-10 test data
cifar10_testing_data = datasets.CIFAR10(
    './data/cifar10', 
    train=False, 
    transform=transform
)

Files already downloaded and verified


In [4]:
# defining cnn model

class ConvNeuralNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.convolution_layer = nn.Sequential(
            # first block
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # second block
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout2d(p=0.05),

            # third block
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fullyconnected_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.1),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        # convolution
        x = self.convolution_layer(x)

        # flatten view
        x = x.view(x.size(0), -1)

        # fully connected layer
        x = self.fullyconnected_layer(x)

        return x
    
# model = ConvNeuralNet()
# print(model)

In [5]:
# training function
def training(model, train_dataloader, optimizer, criterion, epoch, device, print_freq=10):
    model.to(device)
    model.train()
    training_loss = 0
    print(f"Epoch: {epoch}")
    for batch_idx, (data, labels) in enumerate(train_dataloader):
        data, labels = data.to(device), labels.to(device)

        # zero gradients
        optimizer.zero_grad()

        # propagation
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print stats
        training_loss += loss.item() * data.shape[0]


        if not (batch_idx % print_freq):
            print(f"Batch: {batch_idx}/{len(train_dataloader)} | ", 
                  f"Training Loss: {loss.item():.4f} | "
            )
    return training_loss / len(train_dataloader.dataset)

# testing function
def testing(model, test_dataloader, criterion, device):
    model.to(device)
    model.eval()

    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_dataloader:
            # move data and target to the specified device
            data, target = data.to(device), target.to(device)

            # forward pass 
            output = model(data)

            # calculate loss
            test_loss += criterion(output, target).item() * data.size(0)

            # get predictions
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

    # average loss
    average_loss = test_loss / total
    # calculate accuracy
    accuracy = correct / total * 100

    print(f'Test Loss: {average_loss:.4f}, Accuracy: {accuracy:.2f}%')
    
    return average_loss, accuracy

In [6]:
def train_test(model, train_dataloader, test_dataloader, optimizer, criterion, num_epochs, device):
    for epoch in range(1, num_epochs+1):
        # train the model
        train_loss = training(model, train_dataloader, optimizer, criterion, epoch, device)

    # Test the model
    test_loss, test_accuracy = testing(model, test_dataloader, criterion, device)

    return train_loss, test_loss, test_accuracy


# init params
batch_size = 64
epochs = 10
learning_rate = 0.001
momentum = 0.9

# init model
model = ConvNeuralNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)

# load cifar-10 dataloaders

# trainloader
cifar_trainloader = DataLoader(
    cifar10_training_data,
    batch_size=batch_size,
    shuffle=True
)

# testloader
cifar_testloader = DataLoader(
    cifar10_testing_data,
    batch_size=batch_size,
    shuffle=False
)

# Initialize your model, criterion, optimizer, etc.
model = ConvNeuralNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Call the train_test function
train_loss, test_loss, test_accuracy = train_test(
    model, 
    cifar_trainloader, 
    cifar_testloader, 
    optimizer, 
    criterion, 
    epochs, 
    device
)

# Print final results
print(f"Final Training Loss: {train_loss:.4f}, Final Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch: 1
Batch: 0/782 |  Training Loss: 2.2971 | 
Batch: 10/782 |  Training Loss: 2.2938 | 
Batch: 20/782 |  Training Loss: 2.1153 | 
Batch: 30/782 |  Training Loss: 1.7905 | 
Batch: 40/782 |  Training Loss: 2.0506 | 
Batch: 50/782 |  Training Loss: 1.9280 | 
Batch: 60/782 |  Training Loss: 1.7751 | 
Batch: 70/782 |  Training Loss: 1.8771 | 
Batch: 80/782 |  Training Loss: 1.9234 | 
Batch: 90/782 |  Training Loss: 1.8266 | 
Batch: 100/782 |  Training Loss: 1.6117 | 
Batch: 110/782 |  Training Loss: 1.4725 | 
Batch: 120/782 |  Training Loss: 1.5832 | 
Batch: 130/782 |  Training Loss: 1.6139 | 
Batch: 140/782 |  Training Loss: 1.7820 | 
Batch: 150/782 |  Training Loss: 1.6205 | 
Batch: 160/782 |  Training Loss: 1.5618 | 
Batch: 170/782 |  Training Loss: 1.5914 | 
Batch: 180/782 |  Training Loss: 1.6321 | 
Batch: 190/782 |  Training Loss: 1.6264 | 
Batch: 200/782 |  Training Loss: 1.4951 | 
Batch: 210/782 |  Training Loss: 1.4787 | 
Batch: 220/782 |  Training Loss: 1.4724 | 
Batch: 230/78

' \nRun 1 Results:\nTest Loss: 0.6369, Accuracy: 80.62%\nFinal Training Loss: 0.2981, Final Test Loss: 0.6369, Test Accuracy: 80.62%\n'

Run 1 Results:<br>
Final Training Loss: 0.2981, Final Test Loss: 0.6369, Test Accuracy: 80.62%

Run 2 Results:<br>
Final Training Loss: 0.3409, Final Test Loss: 0.5919, Test Accuracy: 81.60%