# Parameter-based MLP
Using torch parameters with einops and einsum operations only
\
\
Ryan Roi Cayas \
2022-22085


In [None]:
import torch
torch.manual_seed(42)

import random
random.seed(42)

## Load MNIST data

In [None]:
import torch
import torchvision
from torchvision import transforms

# Define the transformations to apply to the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create the data loaders
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## MLP Model using Einops and Einsum

In [None]:
import torch.nn as nn
from einops import rearrange, repeat, reduce
from torch import einsum

def fc_eins(input_size, output_size):
    W = nn.Parameter(torch.randn(input_size, output_size) * 0.01)  # Weight initialization
    b = nn.Parameter(torch.zeros(output_size))                     # Bias initialization

    def fc_linear(x):
      return torch.einsum('ij,jk->ik', x, W) + b
    return fc_linear, W, b

class MLP_eins(nn.Module):
    def __init__(self, input_size=28*28, hidden_size=128, num_classes=10):
        super(MLP_eins, self).__init__()

        self.fc1_eins, self.W1, self.b1 = fc_eins(input_size, hidden_size)
        self.fc2_eins, self.W2, self.b2 = fc_eins(hidden_size, num_classes)

        # Register the parameters
        self.register_parameter('W1', self.W1)
        self.register_parameter('b1', self.b1)
        self.register_parameter('W2', self.W2)
        self.register_parameter('b2', self.b2)


    def forward(self, x):
        x = rearrange(x, 'b c h w -> b (c h w)')  # Flatten the input tensor
        y = self.fc1_eins(x)                      # Apply first FC layer
        y = torch.maximum(y, torch.zeros_like(y)) # Apply ReLU activation
        y = self.fc2_eins(y)                      # Apply second FC layer
        return y


# Create an instance of the MLP model
model_eins = MLP_eins()
x = torch.randn(64, 1, 28, 28)
print(model_eins)
print(model_eins(x).shape)

# print the number of parameters
num_params = sum(p.numel() for p in model_eins.parameters())
# Use comma to print the number in a more readable format
print(f"Number of parameters: {num_params:,}")

MLP_eins()
torch.Size([64, 10])
Number of parameters: 101,770


### Define the loss function and optimizer

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} device")

model_eins.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_eins.parameters(), lr=0.001)

Using cuda device


### Train the model

In [None]:
from tqdm import tqdm

num_epochs = 10

model_eins.train()
for epoch in tqdm(range(num_epochs)):
    total_loss = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model_eins(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch: {epoch+1}, Loss: {avg_loss:.4f}")

 10%|█         | 1/10 [00:17<02:33, 17.00s/it]

Epoch: 1, Loss: 0.4203


 20%|██        | 2/10 [00:33<02:13, 16.70s/it]

Epoch: 2, Loss: 0.2048


 30%|███       | 3/10 [00:47<01:47, 15.34s/it]

Epoch: 3, Loss: 0.1476


 40%|████      | 4/10 [01:01<01:28, 14.80s/it]

Epoch: 4, Loss: 0.1184


 50%|█████     | 5/10 [01:15<01:13, 14.67s/it]

Epoch: 5, Loss: 0.1003


 60%|██████    | 6/10 [01:29<00:57, 14.47s/it]

Epoch: 6, Loss: 0.0882


 70%|███████   | 7/10 [01:43<00:42, 14.29s/it]

Epoch: 7, Loss: 0.0781


 80%|████████  | 8/10 [01:58<00:28, 14.39s/it]

Epoch: 8, Loss: 0.0730


 90%|█████████ | 9/10 [02:12<00:14, 14.30s/it]

Epoch: 9, Loss: 0.0651


100%|██████████| 10/10 [02:26<00:00, 14.62s/it]

Epoch: 10, Loss: 0.0598





### Evaluate the model

In [None]:
model_eins.eval()  # Set the model to evaluation mode

correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model_eins(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")


Test Accuracy: 0.9716


## VERIFY: MLP Model using Torch NN

In [None]:
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_size=28*28, hidden_size=128, num_classes=10):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input tensor
        y = self.fc1(x)
        y = self.relu(y)
        y = self.fc2(y)

        return y

# Create an instance of the MLP model
model = MLP()
x = torch.randn(64, 1, 28, 28)
print(model)
print(model(x).shape)

# print the number of parameters
num_params = sum(p.numel() for p in model.parameters())
# Use comma to print the number in a more readable format
print(f"Number of parameters: {num_params:,}")

MLP(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)
torch.Size([64, 10])
Number of parameters: 101,770


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Model Training
from tqdm import tqdm

num_epochs = 10

model.train()
for epoch in tqdm(range(num_epochs)):
    total_loss = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch: {epoch+1}, Loss: {avg_loss:.4f}")


 10%|█         | 1/10 [00:13<02:02, 13.66s/it]

Epoch: 1, Loss: 0.3834


 20%|██        | 2/10 [00:29<01:58, 14.78s/it]

Epoch: 2, Loss: 0.1913


 30%|███       | 3/10 [00:43<01:41, 14.47s/it]

Epoch: 3, Loss: 0.1354


 40%|████      | 4/10 [00:57<01:25, 14.26s/it]

Epoch: 4, Loss: 0.1089


 50%|█████     | 5/10 [01:11<01:10, 14.09s/it]

Epoch: 5, Loss: 0.0914


 60%|██████    | 6/10 [01:24<00:55, 13.89s/it]

Epoch: 6, Loss: 0.0809


 70%|███████   | 7/10 [01:37<00:41, 13.74s/it]

Epoch: 7, Loss: 0.0720


 80%|████████  | 8/10 [01:51<00:27, 13.67s/it]

Epoch: 8, Loss: 0.0627


 90%|█████████ | 9/10 [02:04<00:13, 13.58s/it]

Epoch: 9, Loss: 0.0580


100%|██████████| 10/10 [02:18<00:00, 13.85s/it]

Epoch: 10, Loss: 0.0548





In [None]:
# Model Evaluation

model.eval()  # Set the model to evaluation mode

correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")


Test Accuracy: 0.9703


# The accuracy of the MLP implementation using einops and einsum is near the accuracy of the MLP algorithm using the nn module.

## Accuracy using einops and einsum: 97.16 %
## Accuracy using NN Module; 97.03 %