# Pre-trained AlexNet

In [1]:
# Get general purpose APIs first
import numpy as np
import matplotlib.pyplot as plt

# load the Pytorch APIs
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# 1. Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize(224),  # AlexNet expects 224x224 images
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:05<00:00, 31.3MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## Model definition

In [4]:
# 2. Load Pretrained AlexNet
model = torchvision.models.alexnet(pretrained=True)

num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 10) # 10 output classes

# 3. Modify the final fully connected layer
# AlexNet was trained on ImageNet, which has 1000 classes.
# CIFAR-10 has 10 classes, so we need to change the output layer.

num_features = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_features, 10) # 10 output classes

# 4. Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [9]:
# 5. Move model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

## Training

In [10]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        # Ensure labels are long type
        labels = labels.long()
        loss = criterion(outputs, labels) # Corrected order: outputs, labels
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 1.270
[1,  4000] loss: 0.918
[1,  6000] loss: 0.765
[1,  8000] loss: 0.719
[1, 10000] loss: 0.694
[1, 12000] loss: 0.671
[2,  2000] loss: 0.534
[2,  4000] loss: 0.535
[2,  6000] loss: 0.520
[2,  8000] loss: 0.544
[2, 10000] loss: 0.515
[2, 12000] loss: 0.510
Finished Training


## Validation

In [11]:
# 7. Test the network on the test data
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f} %')

Accuracy of the network on the 10000 test images: 82.55 %


In [12]:
# 8. Class-wise accuracy
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print(f'Accuracy of {classes[i]:5s} : {100 * class_correct[i] / class_total[i]:.2f} %')

Accuracy of plane : 87.80 %
Accuracy of car   : 91.60 %
Accuracy of bird  : 83.70 %
Accuracy of cat   : 67.50 %
Accuracy of deer  : 82.90 %
Accuracy of dog   : 73.50 %
Accuracy of frog  : 71.60 %
Accuracy of horse : 86.90 %
Accuracy of ship  : 88.00 %
Accuracy of truck : 91.30 %


# Pytorch AlexNet from scratch

## Model Definition

In [21]:
class scratchAlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(scratchAlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(6400, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [22]:
# 2. Load scratch AlexNet
model = scratchAlexNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# 5. Move model to GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

scratchAlexNet(
  (layer1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer4): Sequential(
    (0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Re

## Training

In [23]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        # Ensure labels are long type
        labels = labels.long()
        loss = criterion(outputs, labels) # Corrected order: outputs, labels
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 1.958
[1,  4000] loss: 1.683
[1,  6000] loss: 1.531
[1,  8000] loss: 1.405
[1, 10000] loss: 1.308
[1, 12000] loss: 1.206
[2,  2000] loss: 1.122
[2,  4000] loss: 1.062
[2,  6000] loss: 0.999
[2,  8000] loss: 0.989
[2, 10000] loss: 0.935
[2, 12000] loss: 0.904
[3,  2000] loss: 0.841
[3,  4000] loss: 0.806
[3,  6000] loss: 0.802
[3,  8000] loss: 0.782
[3, 10000] loss: 0.773
[3, 12000] loss: 0.773
[4,  2000] loss: 0.691
[4,  4000] loss: 0.681
[4,  6000] loss: 0.676
[4,  8000] loss: 0.663
[4, 10000] loss: 0.662
[4, 12000] loss: 0.645
[5,  2000] loss: 0.591
[5,  4000] loss: 0.565
[5,  6000] loss: 0.578
[5,  8000] loss: 0.576
[5, 10000] loss: 0.586
[5, 12000] loss: 0.570
[6,  2000] loss: 0.492
[6,  4000] loss: 0.501
[6,  6000] loss: 0.494
[6,  8000] loss: 0.519
[6, 10000] loss: 0.515
[6, 12000] loss: 0.507
[7,  2000] loss: 0.436
[7,  4000] loss: 0.435
[7,  6000] loss: 0.465
[7,  8000] loss: 0.450
[7, 10000] loss: 0.440
[7, 12000] loss: 0.452
[8,  2000] loss: 0.382
[8,  4000] 

## Validation

In [24]:
# 7. Test the network on the test data
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f} %')

Accuracy of the network on the 10000 test images: 80.91 %


In [25]:
# 8. Class-wise accuracy
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

for i in range(10):
    print(f'Accuracy of {classes[i]:5s} : {100 * class_correct[i] / class_total[i]:.2f} %')

Accuracy of plane : 79.30 %
Accuracy of car   : 91.50 %
Accuracy of bird  : 84.00 %
Accuracy of cat   : 73.50 %
Accuracy of deer  : 78.60 %
Accuracy of dog   : 69.90 %
Accuracy of frog  : 80.00 %
Accuracy of horse : 80.80 %
Accuracy of ship  : 80.60 %
Accuracy of truck : 87.50 %


# TensorFlow-Keras Alexnet from scratch

In [55]:
# import necessary package
import tensorflow as tf

# printout versions
print(f"Tensor Flow Version: {tf.__version__}")

Tensor Flow Version: 2.18.0


## Load data form keras datasets

In [51]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10) # one-hot encoding
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

## Model definition

In [52]:
def scratch_alexnet():
    # Input layer
    input_layer = tf.keras.layers.Input(shape=(32, 32, 3))

    # Convolutional layers
    conv1 = tf.keras.layers.Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), activation='relu', padding='same')(input_layer)
    b_norm1 = tf.keras.layers.BatchNormalization()(conv1)
    pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2))(b_norm1)
    conv2 = tf.keras.layers.Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), activation='relu', padding='same')(pool1)
    b_norm2 = tf.keras.layers.BatchNormalization()(conv2)
    pool2 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=(2, 2))(b_norm2)
    conv3 = tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same')(pool2)
    b_norm3 = tf.keras.layers.BatchNormalization()(conv3)
    conv4 = tf.keras.layers.Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same')(b_norm3)
    conv5 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation='relu', padding='same')(conv4)
    b_norm5 = tf.keras.layers.BatchNormalization()(conv5)
    pool5 = tf.keras.layers.MaxPool2D(pool_size=(1, 1))(b_norm5)

    # Flatten layer
    flatten = tf.keras.layers.Flatten()(pool5)

    # Fully connected layers
    fc1 = tf.keras.layers.Dense(units=4096, activation='relu')(flatten)
    fc2 = tf.keras.layers.Dense(units=4096, activation='relu')(fc1)
    output_layer = tf.keras.layers.Dense(units=10, activation='softmax')(fc2)

    # Create model
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    return model

# Example usage
model = scratch_alexnet()
model.summary()


In [53]:
model.compile(
    optimizer='adam',  # We can experiment with other optimizers
    loss='categorical_crossentropy', # Let's use 'categorical_crossentropy' for > 2 classes
    metrics=['accuracy']
)

## Training and validation

In [54]:
history = model.fit(x_train, y_train, epochs=10,
                    validation_data=(x_test, y_test))

Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 12ms/step - accuracy: 0.3086 - loss: 1.8838 - val_accuracy: 0.3397 - val_loss: 2.0122
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 9ms/step - accuracy: 0.4402 - loss: 1.5365 - val_accuracy: 0.4637 - val_loss: 1.5552
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.5045 - loss: 1.3702 - val_accuracy: 0.5299 - val_loss: 1.3080
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 9ms/step - accuracy: 0.5509 - loss: 1.2616 - val_accuracy: 0.5478 - val_loss: 1.2672
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.5811 - loss: 1.1777 - val_accuracy: 0.5612 - val_loss: 1.2314
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.6197 - loss: 1.0886 - val_accuracy: 0.5727 - val_loss: 1.2201
Epoch 7/1