## MLP Model

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import transformers
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt
from transformers import ViTFeatureExtractor, ViTForImageClassification
# you may also import other modules if you need
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd

In [16]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [17]:
# Define the CNN model with the specified architecture
class CustomMLP(nn.Module):
    def __init__(self, num_classes):
        super(CustomMLP, self).__init__()

        # Build your MLP model by filling into nn.Sequential() or writing your own layers
        self.fc_layers = nn.Sequential(
            # Flatten layer
            nn.Flatten(),
            # Fully connected layer with 256 units, ReLU activation. You need to calculate the input dimension of this layer.
            nn.Linear(28*28,256),
            nn.ReLU(),
            # Fully connected layer with 128 units, ReLU activation.
            nn.Linear(256,128),
            nn.ReLU(),
            # Dropout layer: dropout rate of 0.5
            nn.Dropout(p=0.5),
            # Output layer with softmax activation (10 classes)
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.fc_layers(x)
        return x




In [18]:
# Load MNIST dataset
BATCH_SIZE = 64
transform = transforms.Compose([
    transforms.ToTensor()
])

# Build train dataset and dataloader
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Build testing dataset and dataloader
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)



In [19]:
# Define hyperparameters
learning_rate = 0.001
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the CNN model
MLP_model = CustomMLP(num_classes=10).to(device)

# Define loss function and optimizer for CNN model
optimizer = optim.Adam(MLP_model.parameters(), lr=learning_rate)
# Write down the loss function
criterion = nn.CrossEntropyLoss()


In [20]:
# Lists to store training metrics
train_loss = []
train_acc = []
test_loss = []
test_acc = []

# Training loop for CNN model
for epoch in range(num_epochs):
    MLP_model.train()

    running_loss = 0.0
    running_corrects = 0
    class_corrects = np.zeros(10)
    class_totals = np.zeros(10)

    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        # Training process here, which includes forward pass of the model, backpropogate of the loss, etc.
        # Remember to use the optimizer and criterion defined previously.

        outputs = MLP_model(images.view(images.size(0), -1))
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == labels.data)

        if batch_idx % 100 == 0:
            print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                epoch+1, batch_idx*len(images), len(train_loader.dataset), 100.*batch_idx /
                len(train_loader), loss.item(), float(running_corrects*100) / float(BATCH_SIZE*(batch_idx+1))))

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_corrects.double() / len(train_loader.dataset)

    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)


    print(f'Epoch [{epoch+1}/{num_epochs}], MLP Train Loss: {epoch_loss:.4f}, MLP Train Acc: {epoch_acc:.4f}')

    # Testing loop for MLP model
    MLP_model.eval()
    test_running_loss = 0.0
    test_running_corrects = 0

    all_preds = []
    all_labels = []

    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Testing process
        # You may need to calculate test_running_loss and test_running_corrects, in order to get this epoch's Testing loss and accuracy
        outputs = MLP_model(images.view(images.size(0), -1))
        loss = criterion(outputs, labels)
        preds = torch.max(outputs, 1)[1]
        test_running_loss += loss.item() * images.size(0)
        test_running_corrects += torch.sum(preds == labels.data)

        # Store for metrics later
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    test_epoch_loss = test_running_loss / len(test_loader.dataset)
    test_epoch_acc = test_running_corrects.double() / len(test_loader.dataset)

    test_loss.append(test_epoch_loss)
    test_acc.append(test_epoch_acc)

    # define any variables you may need to calculate per-class accuracy

    print(f'Testing Loss: {test_epoch_loss:.4f}, Testing Acc: {test_epoch_acc:.4f}')
    print()

# Calculate per-class precision, recall, and F1
precision = precision_score(all_labels, all_preds, average=None, zero_division=0)
recall = recall_score(all_labels, all_preds, average=None, zero_division=0)
f1 = f1_score(all_labels, all_preds, average=None, zero_division=0)
metrics_table = pd.DataFrame([precision, recall, f1], index=["Precision", "Recall", "F1-Score"])
print("Per-class Precision, Recall, and F1-Score (3×10 Table):")
print(metrics_table)


Epoch [1/10], MLP Train Loss: 0.3507, MLP Train Acc: 0.8977
Testing Loss: 0.1267, Testing Acc: 0.9618

Epoch [2/10], MLP Train Loss: 0.1361, MLP Train Acc: 0.9614
Testing Loss: 0.0943, Testing Acc: 0.9708

Epoch [3/10], MLP Train Loss: 0.0931, MLP Train Acc: 0.9729
Testing Loss: 0.0823, Testing Acc: 0.9752

Epoch [4/10], MLP Train Loss: 0.0706, MLP Train Acc: 0.9787
Testing Loss: 0.0699, Testing Acc: 0.9796

Epoch [5/10], MLP Train Loss: 0.0572, MLP Train Acc: 0.9827
Testing Loss: 0.0706, Testing Acc: 0.9804

Epoch [6/10], MLP Train Loss: 0.0476, MLP Train Acc: 0.9853
Testing Loss: 0.0713, Testing Acc: 0.9791

Epoch [7/10], MLP Train Loss: 0.0398, MLP Train Acc: 0.9874
Testing Loss: 0.0720, Testing Acc: 0.9792

Epoch [8/10], MLP Train Loss: 0.0330, MLP Train Acc: 0.9896
Testing Loss: 0.0783, Testing Acc: 0.9805

Epoch [9/10], MLP Train Loss: 0.0292, MLP Train Acc: 0.9904
Testing Loss: 0.0821, Testing Acc: 0.9798

Epoch [10/10], MLP Train Loss: 0.0259, MLP Train Acc: 0.9919
Testing Loss

## Attention Model

In [21]:
# The Vision Transformers are designed for images with multiple color channels,
# and MNIST images are grayscale with only one channel.
# To address this, we need to modify the preprocessing accordingly.
BATCH_SIZE = 64

transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(224),  # Resize images to ViT input size
    torchvision.transforms.Grayscale(num_output_channels=3),  # Convert to 3-channel grayscale
    torchvision.transforms.ToTensor(),
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)



In [22]:
# Build the ViT model
vit_model =
# You need to modify the classification head to match the number of classes in MNIST
class_names = [str(i) for i in range(10)]
vit_model.classifier =

SyntaxError: invalid syntax (3138728634.py, line 2)

In [None]:
# Freeze the backbone (ViT) weights
for param in vit_model.parameters():
    param.requires_grad = False

# Unfreeze the classification head weights
for param in vit_model.classifier.parameters():
    param.requires_grad = True

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vit_model.to(device)

# Define hyperparameters
learning_rate = 1e-4
num_epochs = 5

# Define optimizer and criterion for ViT model
vit_optimizer = torch.optim.AdamW(vit_model.classifier.parameters(), lr=learning_rate)
vit_criterion = torch.nn.CrossEntropyLoss()


In [None]:
# Lists to store training and testing metrics for attention-based model
attention_train_loss = []
attention_train_acc = []
attention_test_loss = []
attention_test_acc = []
attention_per_class_acc = np.zeros(10)

# Training loop for Attention model
for epoch in range(num_epochs):
    vit_model.train()

    running_loss = 0.0
    running_corrects = 0
    class_corrects = np.zeros(10)
    class_totals = np.zeros(10)

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # Training process here
        # Similar to MLP model, we also need to implement the forward pass, backpropogation, etc.
        outputs =
        loss =

        # calculate the number of correctly classified data and loss for this batch
        running_corrects +=
        running_loss +=


        if batch_idx % 100 == 0:
            print('Epoch : {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t Accuracy:{:.3f}%'.format(
                epoch+1, batch_idx*len(inputs), len(train_loader.dataset), 100.*batch_idx /
                len(train_loader), loss.item(), float(running_corrects*100) / float(BATCH_SIZE*(batch_idx+1))))

    attention_epoch_loss = running_loss / len(train_loader.dataset)
    attention_epoch_acc = running_corrects / len(train_loader.dataset)

    attention_train_loss.append(attention_epoch_loss)
    attention_train_acc.append(attention_epoch_acc)


    print(f"Epoch [{epoch+1}/{num_epochs}], Attention Model Train Loss: {attention_epoch_loss:.4f}, Attention Train Acc: {attention_epoch_acc:.4f}")

    # Testing loop for ViT model
    vit_model.eval()
    test_running_loss = 0.0
    test_running_corrects = 0

    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        # Testing process
        # You may need to calculate test_running_loss and test_running_corrects, in order to get this epoch's testing loss and accuracy

        outputs =
        loss =

        test_running_loss +=
        test_running_corrects +=

    attention_test_epoch_loss = test_running_loss / len(test_loader.dataset)
    attention_test_epoch_acc = test_running_corrects / len(test_loader.dataset)

    attention_test_loss.append(attention_test_epoch_loss)
    attention_test_acc.append(attention_test_epoch_acc)

    # define any variables you may need to calculate per-class accuracy

    print(f'Attention Model Test Loss: {attention_test_epoch_loss:.4f}, Attention Model Test Acc: {attention_test_epoch_acc:.4f}')
    print()

print("Training complete!")

# Save the trained model
vit_model.save_pretrained("mnist_vit_model")
# Calculate per-class accuracy
attention_per_class_acc =
print('Attention Per-Class Accuracy For the Best Model:', attention_per_class_acc)