In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchinfo import summary

# Transformations for CIFAR-10
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

# Load CIFAR-10 dataset
batch_size = 64

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

classes = trainset.classes

## Fully-connected network

In [None]:
class FullyConnectedNet(nn.Module):
	def __init__(self):
		super(FullyConnectedNet, self).__init__()
		self.fc1 = nn.Linear(3 * 32 * 32, 512)
		self.fc2 = nn.Linear(512, 512)
		self.fc3 = nn.Linear(512, 128)
		self.fc4 = nn.Linear(128, 10)

	def forward(self, x):
		# flatten input
		x = x.view(x.size(0), -1)
		x = torch.relu(self.fc1(x))
		x = torch.relu(self.fc2(x))
		x = torch.relu(self.fc3(x))
		x = self.fc4(x)
		return x

## Train and test functions

In [None]:
def evaluate_model(model, dataloader):
	model.eval()
	correct = 0
	total = 0
	with torch.no_grad():
		for inputs, labels in dataloader:
			outputs = model(inputs)
			_, predicted = torch.max(outputs, 1)
			total += labels.size(0)
			correct += (predicted == labels).sum().item()
	model.train()
	return correct / total

def train_test(model, criterion, optimizer, epochs=5):
	train_accuracies = []
	test_accuracies = []
	for epoch in range(epochs):
		running_loss = 0.0
		correct_train = 0
		total_train = 0
		for inputs, labels in trainloader:
			optimizer.zero_grad()
			outputs = model(inputs)
			loss = criterion(outputs, labels)
			loss.backward()
			optimizer.step()
			running_loss += loss.item()

			# Calculate training accuracy per batch
			_, predicted = torch.max(outputs, 1)
			total_train += labels.size(0)
			correct_train += (predicted == labels).sum().item()
	
		# Calculate epoch training accuracy
		train_accuracy = correct_train / total_train
		train_accuracies.append(train_accuracy)

		# Evaluate the model on the test dataset
		test_accuracy = evaluate_model(model, testloader)
		test_accuracies.append(test_accuracy)
		print(f"Epoch {epoch + 1}: Training Loss = {running_loss / len(trainloader):.4f}, Training Accuracy = {train_accuracy * 100:.2f}%, Test Accuracy = {test_accuracy * 100:.2f}%")
	return np.array(train_accuracies), np.array(test_accuracies)

In [None]:
def plot_accuracy(train_accuracies, test_accuracies, epochs, title):
    plt.figure(figsize=(10, 6))
    plt.ylim(0, 100)
    plt.plot(range(1, epochs + 1), train_accuracies*100, label='Training Accuracy', marker='o')
    plt.plot(range(1, epochs + 1), test_accuracies*100, label='Test Accuracy', marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title(f'{title} - Training and Test Accuracy')
    plt.legend()
    plt.grid()
    plt.show()

## Train fully-connected network

In [None]:
fc_model = FullyConnectedNet()
# summary(fc_model, input_size=[1, 3, 64, 64]) # single image batch
fc_model

## Small Convolutional Neural Network

In [None]:
class ConvNet3(nn.Module):
    def __init__(self):
        super(ConvNet3, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 4 * 4, 10)  # Adjusted for pooling

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv3(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

In [None]:
conv3_model = ConvNet3()
# summary(conv3_model, input_size=[1, 3, 64, 64])
conv3_model

## Fancy VGG-style CNN

In [None]:
class VGGStyleNet(nn.Module):
    def __init__(self):
        super(VGGStyleNet, self).__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(256 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
vgg_model = VGGStyleNet()
# summary(vgg_model, input_size=[1, 3, 64, 64]) # single image batch
vgg_model

## Train models

In [None]:
criterion = nn.CrossEntropyLoss()
models = [fc_model, conv3_model, vgg_model]
model_names = ["Fully-connected Model", "3-layer CNN", "VGG-style CNN"]
EPOCHS = 10

for i in range(len(models)):
	optimizer = optim.Adam(models[i].parameters(), lr=0.001)
	train_accuracies, test_accuracies = train_test(models[i], criterion, optimizer, EPOCHS)
	plot_accuracy(train_accuracies, test_accuracies, EPOCHS, model_names[i])