In [11]:
import torch
from torchvision import datasets, transforms, models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Device configuration
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

# MNIST dataset
train_data = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor())

# Data loaders
loaders = {
    'train': DataLoader(train_data, batch_size=100, shuffle=True, num_workers=1),
    'test': DataLoader(test_data, batch_size=100, shuffle=True, num_workers=1),
}

# Load ResNet18 model
resnet18 = models.resnet18(pretrained=False)

# Modify the first convolutional layer to accept grayscale images
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

# Modify the output layer to match the number of classes in MNIST
resnet18.fc = nn.Linear(resnet18.fc.in_features, 10)

# Move the model to the device
resnet18 = resnet18.to(device)

# Loss and optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet18.parameters(), lr=0.001)

# Training function
def train(num_epochs, model, loaders):
    model.train()
    total_step = len(loaders['train'])
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = loss_func(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i + 1) % 100 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{total_step}], Loss: {loss.item():.4f}')

# Testing function
def test(model):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy of the model on the 10000 test images: {100 * correct / total:.2f}%')

# Train and evaluate the model
num_epochs = 10
train(num_epochs, resnet18, loaders)
test(resnet18)

# Predict sample images
sample = next(iter(loaders['test']))
imgs, lbls = sample
imgs, lbls = imgs.to(device), lbls.to(device)
outputs = resnet18(imgs[:10])
_, pred_y = torch.max(outputs, 1)
print(f'Prediction number: {pred_y.cpu().numpy()}')
print(f'Actual number: {lbls[:10].cpu().numpy()}')




Epoch [1/10], Step [100/600], Loss: 0.2187
Epoch [1/10], Step [200/600], Loss: 0.0853
Epoch [1/10], Step [300/600], Loss: 0.0454
Epoch [1/10], Step [400/600], Loss: 0.0087
Epoch [1/10], Step [500/600], Loss: 0.0457
Epoch [1/10], Step [600/600], Loss: 0.0075
Epoch [2/10], Step [100/600], Loss: 0.0420
Epoch [2/10], Step [200/600], Loss: 0.1426
Epoch [2/10], Step [300/600], Loss: 0.0072
Epoch [2/10], Step [400/600], Loss: 0.0808
Epoch [2/10], Step [500/600], Loss: 0.0084
Epoch [2/10], Step [600/600], Loss: 0.0259
Epoch [3/10], Step [100/600], Loss: 0.0084
Epoch [3/10], Step [200/600], Loss: 0.0037
Epoch [3/10], Step [300/600], Loss: 0.1415
Epoch [3/10], Step [400/600], Loss: 0.0047
Epoch [3/10], Step [500/600], Loss: 0.1029
Epoch [3/10], Step [600/600], Loss: 0.0043
Epoch [4/10], Step [100/600], Loss: 0.0089
Epoch [4/10], Step [200/600], Loss: 0.0851
Epoch [4/10], Step [300/600], Loss: 0.0146
Epoch [4/10], Step [400/600], Loss: 0.0136
Epoch [4/10], Step [500/600], Loss: 0.0480
Epoch [4/10