Nishank Bhowal - PES2UG22CS366
Trisha Gupta - PES2UG22CS630
Yash Swarup - PES2UG22CS676

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
transform_mnist = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to fit AlexNet input size
    transforms.ToTensor(),
])
transform_cifar = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize CIFAR-10 images to 224x224
    transforms.ToTensor(),
])

train_dataset_mnist = torchvision.datasets.MNIST(root='./data', train=True, transform=transform_mnist, download=True)
test_dataset_mnist = torchvision.datasets.MNIST(root='./data', train=False, transform=transform_mnist, download=True)

train_dataset_cifar = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform_cifar, download=True)
test_dataset_cifar = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform_cifar, download=True)

batch_size = 128
train_loader_mnist = torch.utils.data.DataLoader(train_mnist, batch_size=batch_size, shuffle=True)
test_loader_mnist = torch.utils.data.DataLoader(test_mnist, batch_size=batch_size, shuffle=False)
train_loader_cifar = torch.utils.data.DataLoader(train_cifar, batch_size=batch_size, shuffle=True)
test_loader_cifar = torch.utils.data.DataLoader(test_cifar, batch_size=batch_size, shuffle=False)

# Cell 3: Define CNN Architectures

Files already downloaded and verified
Files already downloaded and verified


In [None]:
class LeNet5(nn.Module):
    def __init__(self, in_channels):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = None  # Placeholder, will be defined later
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = torch.flatten(x, start_dim=1)  # Flatten dynamically

        # Dynamically initialize fc1
        if self.fc1 is None:
            self.fc1 = nn.Linear(x.shape[1], 120).to(x.device)

        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x



In [None]:
class ResNet(nn.Module):
    def __init__(self, num_classes=10, in_channels=3):
        super(ResNet, self).__init__()
        self.model = torchvision.models.resnet18(pretrained=False)
        self.model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)


In [None]:
def train_model(model, train_loader, test_loader, epochs=5):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        correct, total, running_loss = 0, 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss:.4f}, Accuracy: {train_acc:.2f}%")

    end_time = time.time()
    return train_acc, end_time - start_time

In [None]:
print("Training LeNet-5 on MNIST")
model_mnist = LeNet5(in_channels=1)
accuracy_mnist, train_time_mnist = train_model(model_mnist, train_loader_mnist, test_loader_mnist, epochs=5)
num_params_mnist = sum(p.numel() for p in model_mnist.parameters())

Training LeNet-5 on MNIST
Epoch [1/5], Loss: 238.9399, Accuracy: 84.50%
Epoch [2/5], Loss: 66.8160, Accuracy: 95.64%
Epoch [3/5], Loss: 49.1241, Accuracy: 96.80%
Epoch [4/5], Loss: 41.0682, Accuracy: 97.30%
Epoch [5/5], Loss: 35.7002, Accuracy: 97.60%


In [None]:
print("Training LeNet-5 on CIFAR-10")
model_cifar = LeNet5(in_channels=3)
accuracy_cifar, train_time_cifar = train_model(model_cifar, train_loader_cifar, test_loader_cifar, epochs=5)
num_params_cifar = sum(p.numel() for p in model_cifar.parameters())

Training LeNet-5 on CIFAR-10
Epoch [1/5], Loss: 708.4304, Accuracy: 33.86%
Epoch [2/5], Loss: 596.9119, Accuracy: 44.69%
Epoch [3/5], Loss: 563.7840, Accuracy: 48.00%
Epoch [4/5], Loss: 543.1040, Accuracy: 50.15%
Epoch [5/5], Loss: 529.8309, Accuracy: 51.42%


In [None]:
print("Training ResNet on MNIST")
model_resnet_mnist = ResNet(in_channels=1)
accuracy_resnet_mnist, train_time_resnet_mnist = train_model(model_resnet_mnist, train_loader_mnist, test_loader_mnist, epochs=5)
num_params_resnet_mnist = sum(p.numel() for p in model_resnet_mnist.parameters())

Training ResNet on MNIST




Epoch [1/5], Loss: 47.8880, Accuracy: 96.86%
Epoch [2/5], Loss: 19.5294, Accuracy: 98.77%
Epoch [3/5], Loss: 14.7873, Accuracy: 99.06%
Epoch [4/5], Loss: 10.8500, Accuracy: 99.28%
Epoch [5/5], Loss: 10.5339, Accuracy: 99.31%


In [None]:
print("Training ResNet on CIFAR-10")
model_resnet_cifar = ResNet(in_channels=3)
accuracy_resnet_cifar, train_time_resnet_cifar = train_model(model_resnet_cifar, train_loader_cifar, test_loader_cifar, epochs=5)
num_params_resnet_cifar = sum(p.numel() for p in model_resnet_cifar.parameters())


Training ResNet on CIFAR-10
Epoch [1/5], Loss: 448.4437, Accuracy: 58.94%
Epoch [2/5], Loss: 280.1619, Accuracy: 74.81%
Epoch [3/5], Loss: 210.3057, Accuracy: 81.40%
Epoch [4/5], Loss: 164.7681, Accuracy: 85.36%
Epoch [5/5], Loss: 119.2963, Accuracy: 89.43%


In [None]:
from torchvision import transforms

transform_mnist = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize to match AlexNet's expected size
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [None]:
class AlexNet(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 3 * 3, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


In [None]:
import torch
import torch.nn as nn

class AlexNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=10):
        super(AlexNet, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output size: 16x16

            nn.Conv2d(64, 192, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Output size: 8x8

            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)  # Output size: 4x4
        )

        self.fc_input_dim = 256 * 4 * 4

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(self.fc_input_dim, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x


model_cifar10 = AlexNet(in_channels=3, num_classes=10)


In [None]:
print("Training AlexNet on MNIST")
model_alexnet_mnist = AlexNet(in_channels=1)
accuracy_alexnet_mnist, train_time_alexnet_mnist = train_model(model_alexnet_mnist, train_loader_mnist, test_loader_mnist, epochs=5)
num_params_alexnet_mnist = sum(p.numel() for p in model_alexnet_mnist.parameters())


Training AlexNet on MNIST
Epoch [1/5], Loss: 181.2995, Accuracy: 86.60%
Epoch [2/5], Loss: 32.2469, Accuracy: 98.11%
Epoch [3/5], Loss: 24.1084, Accuracy: 98.54%
Epoch [4/5], Loss: 21.1316, Accuracy: 98.81%
Epoch [5/5], Loss: 19.3606, Accuracy: 98.89%


In [None]:

print("Training AlexNet on CIFAR-10")
model_alexnet_cifar = AlexNet(in_channels=3)
accuracy_alexnet_cifar, train_time_alexnet_cifar = train_model(model_alexnet_cifar, train_loader_cifar, test_loader_cifar, epochs=5)
num_params_alexnet_cifar = sum(p.numel() for p in model_alexnet_cifar.parameters())

Training AlexNet on CIFAR-10
Epoch [1/5], Loss: 639.4195, Accuracy: 38.23%
Epoch [2/5], Loss: 461.7458, Accuracy: 57.18%
Epoch [3/5], Loss: 379.3506, Accuracy: 65.80%
Epoch [4/5], Loss: 323.9928, Accuracy: 71.05%
Epoch [5/5], Loss: 287.6544, Accuracy: 74.60%


In [None]:
import pandas as pd


In [None]:
results = [
    ("MNIST", "LeNet-5", accuracy_mnist, num_params_mnist, train_time_mnist),
    ("CIFAR-10", "LeNet-5", accuracy_cifar, num_params_cifar, train_time_cifar),
    ("MNIST", "ResNet", accuracy_resnet_mnist, num_params_resnet_mnist, train_time_resnet_mnist),
    ("CIFAR-10", "ResNet", accuracy_resnet_cifar, num_params_resnet_cifar, train_time_resnet_cifar),
    ("MNIST", "AlexNet", accuracy_alexnet_mnist, num_params_alexnet_mnist, train_time_alexnet_mnist),
    ("CIFAR-10", "AlexNet", accuracy_alexnet_cifar, num_params_alexnet_cifar, train_time_alexnet_cifar)
]


import pandas as pd
result_df = pd.DataFrame(results, columns=["Dataset", "Model", "Accuracy", "Parameters", "Training Time (s)"])


print(result_df)


    Dataset    Model   Accuracy  Parameters  Training Time (s)
0     MNIST  LeNet-5  97.601667       61706          64.002037
1  CIFAR-10  LeNet-5  51.422000       83126          62.399046
2     MNIST   ResNet  99.315000    11172810         144.525892
3  CIFAR-10   ResNet  89.432000    11173962         130.485182
4     MNIST  AlexNet  98.895000    28513994         125.151195
5  CIFAR-10  AlexNet  74.600000    35855178         119.748351


In [None]:

results_alexnet = [
    ("MNIST", "AlexNet", accuracy_alexnet_mnist, num_params_alexnet_mnist, train_time_alexnet_mnist),
    ("CIFAR-10", "AlexNet", accuracy_alexnet_cifar, num_params_alexnet_cifar, train_time_alexnet_cifar)
]

result_df_alexnet = pd.DataFrame(results_alexnet, columns=["Dataset", "Model", "Accuracy", "Parameters", "Training Time (s)"])
print(result_df_alexnet)


    Dataset    Model  Accuracy  Parameters  Training Time (s)
0     MNIST  AlexNet    98.895    28513994         125.151195
1  CIFAR-10  AlexNet    74.600    35855178         119.748351


1)  LeNet-5 (MNIST): 61,706 parameters

LeNet-5 (CIFAR-10): 83,126 parameters

ResNet (MNIST): 11,172,810 parameters

ResNet (CIFAR-10): 11,173,962 parameters

AlexNet (MNIST): 28,513,994 parameters

AlexNet (CIFAR-10): 35,855,178 parameter

2)
As the number of parameters increases, there tends to be an improvement in accuracy, especially on more complex datasets like CIFAR-10.

3)
due to its 3 color channels (RGB) and more varied image categories compared to the simple, grayscale MNIST dataset. This increased complexity makes it more challenging for models like LeNet-5 to achieve high accuracy, resulting in lower accuracy on CIFAR-10.

With 3 channels (RGB), the model has to process more information per image, leading to an increase in training time. The training time is notably higher for models on CIFAR-10 than on MNIST, though the differences aren’t extreme

4)
More Parameters = Higher Accuracy: Larger models (ResNet, AlexNet)  achieved higher accuracy on both datasets. However, they come with a trade-off in terms of computation.

Higher Accuracy = More Training Time: As the complexity of the model increases, so does the time required to train it.