In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)  # First fully connected layer
        self.fc2 = nn.Linear(128, 64)     # Second fully connected layer
        self.fc3 = nn.Linear(64, 10)      # Output layer (10 classes for 10 digits)

    def forward(self, x):
        x = x.view(-1, 28*28)   # Flatten the input tensor
        x = F.relu(self.fc1(x)) # Apply ReLU activation
        x = F.relu(self.fc2(x)) # Apply ReLU activation
        x = self.fc3(x)         # Output layer (no activation here, it's included in loss function)
        return x

model = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

num_epochs = 5
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for data, target in train_loader:
        optimizer.zero_grad()  # Zero the gradient buffers
        output = model(data)    # Forward pass
        loss = criterion(output, target)  # Compute loss
        loss.backward()         # Backpropagate the loss
        optimizer.step()        # Update weights
        running_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {running_loss/len(train_loader)}')

model.eval()  # Set the model to evaluation mode
correct = 0
total = 0
with torch.no_grad():  # No need to calculate gradients during evaluation
    for data, target in test_loader:
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total}%')

torch.save(model.state_dict(), 'mnist_model.pth')

model = Net()
model.load_state_dict(torch.load('mnist_model.pth'))
model.eval()

with torch.no_grad():
    sample_data, sample_target = test_dataset[0]
    sample_output = model(sample_data.unsqueeze(0))  # Add batch dimension
    _, predicted_label = torch.max(sample_output, 1)
    print(f'Predicted Label: {predicted_label.item()}')


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:09<00:00, 1054688.43it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 69466.51it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:02<00:00, 582654.53it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]


Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw

Epoch 1, Loss: 0.32029556106926915
Epoch 2, Loss: 0.1135892524644097
Epoch 3, Loss: 0.07730672735649385
Epoch 4, Loss: 0.057949916036404384
Epoch 5, Loss: 0.0462727448277509
Accuracy of the network on the test images: 97.48%
Predicted Label: 7


  model.load_state_dict(torch.load('mnist_model.pth'))
