In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

# Define transforms for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize images
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                         shuffle=False, num_workers=2)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 32 * 32 * 3)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

for epoch in range(10):
    print(f"For epoch {epoch+1}")
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1562:  # Print every 1562 mini-batches
            print("Loss: ", running_loss / 2000)
            running_loss = 0.0

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy : {(correct/total):.2%}')


Files already downloaded and verified
Files already downloaded and verified
For epoch 1
Loss:  1.4903423233032227
For epoch 2
Loss:  1.2621796065568924
For epoch 3
Loss:  1.1717263000905513
For epoch 4
Loss:  1.1037519314289093
For epoch 5
Loss:  1.0480027922391892
For epoch 6
Loss:  1.001179738789797
For epoch 7
Loss:  0.9589280534982682
For epoch 8
Loss:  0.9197740755677223
For epoch 9
Loss:  0.8835287545919418
For epoch 10
Loss:  0.8486873773634434
Accuracy : 53.78%


# CNN IMPLEMENTATION

In [None]:
import torch
from torchvision import transforms
import torch.nn as nn
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor, Normalize
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

learning_rate = 0.001
epochs = 10

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=ToTensor())
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=ToTensor())

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.2154, 0.2024))
])

train_dataset.transform = transform
test_dataset.transform = transform

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

model = CNN()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

criterion = nn.CrossEntropyLoss()

def train(model, criterion, optimizer, train_loader):
    model.train()
    for i, (images, labels) in enumerate(train_loader):
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # if i % 100 == 0:
        #     print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        #         epoch, i * len(images), len(train_loader.dataset),
        #         100. * i / len(train_loader), loss.item()))

def test(model, criterion, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            test_loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100 * correct / total
    print('\nTest set for: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        test_loss, correct, total, accuracy))

Files already downloaded and verified
Files already downloaded and verified


In [None]:
model = CNN()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for epoch in range(epochs):
    train(model, criterion, optimizer, train_loader)
    test(model, criterion, test_loader)


Test set: Average loss: 0.0225, Accuracy: 4767/10000 (47.67%)

Test set: Average loss: 0.0204, Accuracy: 5335/10000 (53.35%)

Test set: Average loss: 0.0191, Accuracy: 5693/10000 (56.93%)

Test set: Average loss: 0.0182, Accuracy: 5947/10000 (59.47%)

Test set: Average loss: 0.0175, Accuracy: 6097/10000 (60.97%)

Test set: Average loss: 0.0172, Accuracy: 6140/10000 (61.40%)

Test set: Average loss: 0.0170, Accuracy: 6259/10000 (62.59%)

Test set: Average loss: 0.0174, Accuracy: 6215/10000 (62.15%)

Test set: Average loss: 0.0166, Accuracy: 6367/10000 (63.67%)

Test set: Average loss: 0.0171, Accuracy: 6342/10000 (63.42%)


# VGG IMPLEMENATION

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Am I using Gpu? ",torch.cuda.is_available())
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

vgg16 = torchvision.models.vgg16(pretrained=True)

num_features = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(num_features, 10)

vgg16 = vgg16.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

vgg16.train()
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 200 == 199:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 200))
            running_loss = 0.0

vgg16.eval()
correct = 0
total = 0
predicted_labels = []
true_labels = []

with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = vgg16(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        predicted_labels.extend(predicted.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

accuracy = correct / total

accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy of VGG is: {accuracy:.2%}")


I am using  True
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:06<00:00, 25163140.15it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:05<00:00, 111MB/s] 


[1,   200] loss: 0.788
[1,   400] loss: 0.424
[1,   600] loss: 0.354
[2,   200] loss: 0.236
[2,   400] loss: 0.246
[2,   600] loss: 0.225
[3,   200] loss: 0.166
[3,   400] loss: 0.173
[3,   600] loss: 0.161
[4,   200] loss: 0.109
[4,   400] loss: 0.111
[4,   600] loss: 0.122
[5,   200] loss: 0.074
[5,   400] loss: 0.079
[5,   600] loss: 0.077
[6,   200] loss: 0.048
[6,   400] loss: 0.053
[6,   600] loss: 0.058
[7,   200] loss: 0.043
[7,   400] loss: 0.042
[7,   600] loss: 0.041
[8,   200] loss: 0.030
[8,   400] loss: 0.031
[8,   600] loss: 0.033
[9,   200] loss: 0.020
[9,   400] loss: 0.030
[9,   600] loss: 0.024
[10,   200] loss: 0.015
[10,   400] loss: 0.024
[10,   600] loss: 0.025
Finished Training
Accuracy of the network on the 10000 test images: 93.17 %
Accuracy: 0.9317


    MLP:
        Test set accuracy: 53.78%
        Average loss: The loss starts at around 1.49 in the first epoch and gradually decreases to around 0.85 in the last epoch.

    CNN:
        Test set accuracy: 63.42%
        Average loss: The average loss is reported as 0.0171.

    VGG:
        Test set accuracy: 93.17%
        Average loss: The loss starts relatively high at 0.788 in the first epoch and steadily decreases over training, reaching 0.015 in the last epoch.

CNNs leverage the spatial structure of images by using convolutional layers to detect patterns and features at different spatial hierarchies. These layers are designed to preserve the spatial relationships within the image, allowing CNNs to capture complex visual information more effectively compared to MLPs. In contrast, MLPs treat images as flattened vectors, ignoring spatial structure, which limits their ability to extract meaningful features from images. This fundamental difference in architecture accounts for CNNs' superior performance in tasks like image classification.

Transfer learning with the VGG model improves performance and reduces training time by leveraging pre-learned features from a large dataset (like ImageNet) and adapting them to a smaller target dataset (such as CIFAR-10). This approach saves time by avoiding the need to learn features from scratch and allows the model to quickly adapt to the target dataset's characteristics through fine-tuning. Overall, it offers a more efficient way to build high-performance models for image classification tasks.