Trying to implement AlexNet architecture from scratch

In [1]:
import torch
import torch.nn as nn

In [2]:
class local_response_normalization(nn.Module):
    def __init__(self,k=2,alpha=1e-4,n=5,beta=0.75):
        super(local_response_normalization,self).__init__()
        self.k = k
        self.alpha = alpha
        self.n = n
        self.beta = beta
    def forward(self,x):
        N,C,H,W = x.shape # N is batch size, C is number of channels, H is height, W is width of the input # we are extracting dimensions of the input
        #When a convolutional layer has C kernels(filters), it produces C output feature maps (channels) . So here N=C (no. of channels in layer)
        summation = torch.zeros(x.size())# Creating a tensor of zeros with the same shape as x
        for i in range(C):
            for j in range(max(0,i-self.n//2),min(C-1,i+self.n//2)):
                summation[:,i,:,:] += x[:,j,:,:]*x[:,j,:,:]#batches : , channels : , height : , width :
        denom = (self.k+self.alpha*summation)**self.beta
        return x/denom


Order of layers-
1. Convolution layer
2. ReLU activation
3. Local response normalization
4. Max pooling

**"The response-normalization layers follow the first and second convolutional layers."** is what the paper says

In [3]:
class AlexNet(nn.Module):
    def __init__(self,num_classes=10):#Adjusted for CIFAR-10 dataset
        super(AlexNet,self).__init__()
        self.lrn = local_response_normalization(k=2,alpha=1e-4,n=5,beta=0.75)
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=0),
            nn.ReLU(inplace=True),
            self.lrn,
            nn.MaxPool2d(kernel_size=3,stride=2),
            nn.Conv2d(96,256,5,padding=2),#since same convolution is used padding=filter-1/2 ie (5-1)/2 = 2
            nn.ReLU(inplace=True),
            self.lrn,
            nn.MaxPool2d(kernel_size=3,stride=2),
            nn.Conv2d(256,384,3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384,384,3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384,256,3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,stride=2),
        )#This ends our convolution part of the network
        #Now we will define the fully connected layers
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(6*6*256,4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096,num_classes)
        )
    def forward(self,x):
        x = self.features(x)
        #x = nn.Flatten(x,1)#cretes problem in dropout because flatten layer is being passes directly to dropout layer, flatten is not tensor but layer hence cannot be passed to dropout layer
        x = x.view(x.size(0),-1)#flattening the input tensor to a 2D tensor[batch_size, num_features]
        x = self.classifier(x)
        return x

In [4]:
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
#from torch.optim.lr_scheduler import StepLR

In [5]:
#defining transforms
transform = transforms.Compose([
    transforms.Resize((227, 227)),  # Resize the image to 227x227
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image. #the mean and standard deviation values used here are the ones used in the original AlexNet paper.
])

In [6]:

#Load the CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
from torch.utils.data import Subset
train_dataset = Subset(train_dataset, range(0, 1000))  # Use only 1000 samples
test_dataset = Subset(test_dataset, range(0, 200))
#Create data loaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

In [7]:
#initialize the model, loss function and optimizer
model = AlexNet(num_classes=10)  # CIFAR-10 has 10 classes
#setup device agnostic code to run on GPU if available
if torch.cuda.is_available():
    model = model.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
loss_fn = nn.CrossEntropyLoss()  # Loss function for multi-class classification
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  # Stochastic Gradient Descent optimizer
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)  # Learning rate scheduler



In [8]:

#Training step
def train(model, train_dataloader, optimizer, loss_fn, device):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_dataloader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU if available
        optimizer.zero_grad()  # Zero the gradients

        outputs = model(images)  # Forward pass
        loss = loss_fn(outputs, labels)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        #track the loss and accuracy
        running_loss += loss.item() # Accumulate loss
        _, predicted = torch.max(outputs.data, 1)  # Get predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # Count correct predictions

    epoch_loss = running_loss / len(train_dataloader.dataset)
    epoch_accuracy = correct / total * 100.0

    return epoch_loss, epoch_accuracy


In [9]:
#Testing step
def test(model, test_dataloader, loss_fn, device):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():  # No gradient calculation during testing
        for images, labels in test_dataloader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU if available

            outputs = model(images)  # Forward pass
            loss = loss_fn(outputs, labels)  # Compute loss

            running_loss += loss.item() # Accumulate loss
            _, predicted = torch.max(outputs.data, 1)  # Get predictions
            total += labels.size(0)
            correct += (predicted == labels).sum().item()  # Count correct predictions

    epoch_loss = running_loss / len(test_dataloader.dataset)
    epoch_accuracy = correct / total * 100.0

    return epoch_loss, epoch_accuracy

In [None]:

#Training loop
num_epochs = 10  # Number of epochs to train
for epoch in range(num_epochs):
    train_loss, train_accuracy = train(model, train_dataloader, optimizer, loss_fn, device)
    test_loss, test_accuracy = test(model, test_dataloader, loss_fn, device)

    print(f'Epoch [{epoch + 1}/{num_epochs}] || Train Loss: {train_loss:.4f} || Train Accuracy: {train_accuracy:.2f}% || Test Loss: {test_loss:.4f} || Test Accuracy: {test_accuracy:.2f}%')
    #scheduler.step()  # Update learning rate if using a scheduler
