# Task 1

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'{device}')

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))  
])

train_dataset = datasets.MNIST(root='./data', train=True, download=False, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

class_names = [str(i) for i in range(10)]


cuda


In [2]:
class BaseCNN(nn.Module):
    def __init__(self):
        super(BaseCNN, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.fc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 3 * 3, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
        
    def forward(self, x):
        x = self.conv_layer(x)
        x = self.fc_layer(x)
        return x


In [4]:
criterion = nn.CrossEntropyLoss()

# define a dictionary of optimizers
optimizers = {
    'SGD': optim.SGD,
    'Adam': optim.Adam,
    'RMSprop': optim.RMSprop
}

#
def train_model(optimizer_name, optimizer_class, num_epochs=10):
    model = BaseCNN().to(device)  # initialize the model everytime
    optimizer = optimizer_class(model.parameters(), lr=0.001)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)  
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}')

# train model with different optimizers
for optimizer_name, optimizer_class in optimizers.items():
    print(f'Training with {optimizer_name} optimizer')
    train_model(optimizer_name, optimizer_class)

Training with SGD optimizer
Epoch [1/10], Loss: 2.283031440747064
Epoch [2/10], Loss: 2.2168542881255973
Epoch [3/10], Loss: 1.8826503279620905
Epoch [4/10], Loss: 0.8610656568681253
Epoch [5/10], Loss: 0.47344491700691455
Epoch [6/10], Loss: 0.35426543594232757
Epoch [7/10], Loss: 0.29161223046370405
Epoch [8/10], Loss: 0.2518787252138863
Epoch [9/10], Loss: 0.22157782330902528
Epoch [10/10], Loss: 0.1981378698955848
Training with Adam optimizer
Epoch [1/10], Loss: 0.1458149415341321
Epoch [2/10], Loss: 0.041797294096002524
Epoch [3/10], Loss: 0.030017422189293336
Epoch [4/10], Loss: 0.02328468844417293
Epoch [5/10], Loss: 0.018765133913005837
Epoch [6/10], Loss: 0.014727911020902523
Epoch [7/10], Loss: 0.012896599928042434
Epoch [8/10], Loss: 0.011153208784938978
Epoch [9/10], Loss: 0.010025222429457678
Epoch [10/10], Loss: 0.009053865006825403
Training with RMSprop optimizer
Epoch [1/10], Loss: 0.14458926743281675
Epoch [2/10], Loss: 0.04043601254299869
Epoch [3/10], Loss: 0.0285223

# Task 2

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Attention(nn.Module):
    def __init__(self, in_channels):
        super(Attention, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, in_channels // 8, 1)
        self.conv2 = nn.Conv2d(in_channels // 8, in_channels, 1)
    
    def forward(self, x):
        attention = self.conv1(x)
        attention = F.relu(attention)
        attention = self.conv2(attention)
        attention = torch.sigmoid(attention)
        return x * attention

In [6]:
class BaseCNNWithAttention(nn.Module):
    def __init__(self):
        super(BaseCNNWithAttention, self).__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        
        self.attention = Attention(128)
        
        self.fc_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 3 * 3, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
        
    def forward(self, x):
        x = self.conv_layer(x)
        x = self.attention(x)
        x = self.fc_layer(x)
        return x

In [7]:

def train_model(optimizer_name, optimizer_class, num_epochs=10):
    model = BaseCNNWithAttention().to(device)  # used the model with attention block
    optimizer = optimizer_class(model.parameters(), lr=0.001)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device) 
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}')

train_model(optimizer_name, optim.Adam)

Epoch [1/10], Loss: 0.1556467492801588
Epoch [2/10], Loss: 0.04428254471199938
Epoch [3/10], Loss: 0.032054501649997964
Epoch [4/10], Loss: 0.024812270265264582
Epoch [5/10], Loss: 0.01923090498583014
Epoch [6/10], Loss: 0.016725794168893245
Epoch [7/10], Loss: 0.013315916987488045
Epoch [8/10], Loss: 0.012251585158132825
Epoch [9/10], Loss: 0.009430731267675952
Epoch [10/10], Loss: 0.009637673136558757
