In [1]:
import torch

ModuleNotFoundError: No module named 'torch'

In [None]:
from torchvision import datasets, transforms

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:
train_dataset = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
from PIL import Image
import os

def ProjectDataLoader():
    image_tensors = []
    labels = []
    png_files = [f for f in os.listdir('/content') if f.lower().endswith('.png')]
    for fname in png_files:
        if fname.endswith('.png'):
            label_str = fname.split('-')[0]
            label = int(label_str)
            img_path = os.path.join('/content', fname)
            img = Image.open(img_path).convert('L')
            tensor_img = transform(img)
            image_tensors.append(tensor_img)
            labels.append(label)
    images_tensor = torch.stack(image_tensors)
    labels_tensor = torch.tensor(labels, dtype=torch.long)
    return images_tensor, labels_tensor

custom_images, custom_labels = ProjectDataLoader()
print(f"Loaded {custom_images.shape[0]} custom images. Label tensor shape: {custom_labels.shape}")


In [None]:
import torch.nn as nn

In [None]:
class DigitMLP(nn.Module):
    def __init__(self):
        super(DigitMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10)
        )
    def forward(self, x):
        return self.model(x)

# Training with Adam Optimizer

In [None]:
model = DigitMLP()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
import matplotlib.pyplot as plt

In [None]:
epochs = 10
train_losses = []
for epoch in range(1, epochs+1):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total

    print(f"Epoch {epoch}/{epochs} - Training Loss: {avg_loss:.4f} - MNIST Test Accuracy: {accuracy:.2f}%")

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(range(1, epochs+1), train_losses, marker='o')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Average Training Loss')
plt.grid(True)
plt.show()

In [None]:
model.eval()
mnist_correct = 0
for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    mnist_correct += (predicted == labels).sum().item()
mnist_total = len(test_dataset)
mnist_accuracy = 100 * mnist_correct / mnist_total
print(f"Final MNIST test accuracy: {mnist_accuracy:.2f}% ({mnist_correct}/{mnist_total} correct)")

custom_images = custom_images.to(device)
custom_labels = custom_labels.to(device)
with torch.no_grad():
    custom_outputs = model(custom_images)
    _, custom_preds = torch.max(custom_outputs, 1)
    custom_correct = (custom_preds == custom_labels).sum().item()
custom_total = custom_labels.size(0)
custom_accuracy = 100 * custom_correct / custom_total
print(f"Accuracy on custom digits: {custom_accuracy:.2f}%  ({custom_correct}/{custom_total} correct)")

# Improvement #1 : Expand MLP Model

In [None]:
import torch.nn as nn

class ExpandedDigitMLP(nn.Module):
    def __init__(self):
        super(ExpandedDigitMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        return self.model(x)

In [None]:
model = ExpandedDigitMLP()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 10
train_losses = []
for epoch in range(1, epochs+1):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total

    print(f"Epoch {epoch}/{epochs} - Training Loss: {avg_loss:.4f} - MNIST Test Accuracy: {accuracy:.2f}%")

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(range(1, epochs+1), train_losses, marker='o')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Average Training Loss')
plt.grid(True)
plt.show()

In [None]:
model.eval()
mnist_correct = 0
for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    mnist_correct += (predicted == labels).sum().item()
mnist_total = len(test_dataset)
mnist_accuracy = 100 * mnist_correct / mnist_total
print(f"Final MNIST test accuracy: {mnist_accuracy:.2f}% ({mnist_correct}/{mnist_total} correct)")

custom_images = custom_images.to(device)
custom_labels = custom_labels.to(device)
with torch.no_grad():
    custom_outputs = model(custom_images)
    _, custom_preds = torch.max(custom_outputs, 1)
    custom_correct = (custom_preds == custom_labels).sum().item()
custom_total = custom_labels.size(0)
custom_accuracy = 100 * custom_correct / custom_total
print(f"Accuracy on custom digits: {custom_accuracy:.2f}%  ({custom_correct}/{custom_total} correct)")

# Improvement #2 : Expand MLP Model Again

In [None]:
import torch.nn as nn

class ExpandedDigitMLP(nn.Module):
    def __init__(self):
        super(ExpandedDigitMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.model(x)


In [None]:
model = ExpandedDigitMLP()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 10
train_losses = []
for epoch in range(1, epochs+1):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total

    print(f"Epoch {epoch}/{epochs} - Training Loss: {avg_loss:.4f} - MNIST Test Accuracy: {accuracy:.2f}%")

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(range(1, epochs+1), train_losses, marker='o')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Average Training Loss')
plt.grid(True)
plt.show()

In [None]:
model.eval()
mnist_correct = 0
for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    mnist_correct += (predicted == labels).sum().item()
mnist_total = len(test_dataset)
mnist_accuracy = 100 * mnist_correct / mnist_total
print(f"Final MNIST test accuracy: {mnist_accuracy:.2f}% ({mnist_correct}/{mnist_total} correct)")

custom_images = custom_images.to(device)
custom_labels = custom_labels.to(device)
with torch.no_grad():
    custom_outputs = model(custom_images)
    _, custom_preds = torch.max(custom_outputs, 1)
    custom_correct = (custom_preds == custom_labels).sum().item()
custom_total = custom_labels.size(0)
custom_accuracy = 100 * custom_correct / custom_total
print(f"Accuracy on custom digits: {custom_accuracy:.2f}%  ({custom_correct}/{custom_total} correct)")

# Improvement #3 : Training for More Epochs with Learning Rate Scheduler

In [None]:
model = ExpandedDigitMLP().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

epochs = 15
train_losses = []
test_accuracies = []

for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    scheduler.step()

    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    accuracy = 100 * correct / total
    test_accuracies.append(accuracy)

    print(f"Epoch {epoch}/{epochs} - Loss: {avg_loss:.4f} - Test Acc: {accuracy:.2f}% - LR: {scheduler.get_last_lr()[0]:.4f}")

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(range(1, epochs+1), train_losses, marker='o')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Average Training Loss')
plt.grid(True)
plt.show()

In [None]:
model.eval()
mnist_correct = 0
for images, labels in test_loader:
    images, labels = images.to(device), labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    mnist_correct += (predicted == labels).sum().item()
mnist_total = len(test_dataset)
mnist_accuracy = 100 * mnist_correct / mnist_total
print(f"Final MNIST test accuracy: {mnist_accuracy:.2f}% ({mnist_correct}/{mnist_total} correct)")

custom_images = custom_images.to(device)
custom_labels = custom_labels.to(device)
with torch.no_grad():
    custom_outputs = model(custom_images)
    _, custom_preds = torch.max(custom_outputs, 1)
    custom_correct = (custom_preds == custom_labels).sum().item()
custom_total = custom_labels.size(0)
custom_accuracy = 100 * custom_correct / custom_total
print(f"Accuracy on custom digits: {custom_accuracy:.2f}%  ({custom_correct}/{custom_total} correct)")

In [None]:
# Train all models and combine their training-loss curves

# 1) Prepare a dict to hold each model’s loss curve
train_losses = {}

# 2) Define your models in a dict for easy iteration
models = {
    "Model 1": model1,
    "Model 2": model2,
    "Model 3": model3,
    "Model 4": model4,
}

# 3) Train each model, record its loss history
for name, mdl in models.items():
    print(f"Training {name}…")
    hist = mdl.fit(
        X_train, y_train,
        epochs=20,
        batch_size=32,
        validation_data=(X_val, y_val),
        verbose=1
    )
    train_losses[name] = hist.history['loss']

# 4) Plot all training-loss curves together
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
for name, losses in train_losses.items():
    plt.plot(losses, label=name)

plt.xlabel('Epoch')
plt.ylabel('Training Loss')
plt.title('Training Loss Comparison Across 4 Models')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
