In [5]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

In [6]:
# Define transformations to apply to the data
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize the pixel values
])

# Download and load the training set
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test set
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=1000, shuffle=False)


In [7]:
Kernel_size = 5
N_channels = 64
P_1 = 0.25
P_2 = 0.5
in_features = 64 * 20 * 20
hidden_size =  128
output_size = 10
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define the layers of the network
        self.Conv1 = nn.Conv2d(1, N_channels, Kernel_size, bias=False)  # Convolutional layer with 1 input channel, 32 output channels, and 3x3 kernel
        self.Conv2 = nn.Conv2d(N_channels, N_channels, Kernel_size, bias=False) # Convolutional layer with 32 input channels, 64 output channels, and 3x3 kernel
        self.Dropout1 = nn.Dropout(p=P_1)
        self.FC1 = nn.Linear(in_features, hidden_size)  # Fully connected layer with 64*6*6 input features and 128 output features
        self.Dropout2 = nn.Dropout(p=P_2)
        self.FC2 = nn.Linear(hidden_size, output_size)      # Fully connected layer with 128 input features and 10 output features (for 10 classes)

    def forward(self, x):
        # A good programmer always debugs using print statements
        # Define the forward pass through the network
        x = F.relu(self.Conv1(x))   # Apply convolution, then ReLU activation
        #print("shape after first conv", x.shape)
        x = F.relu(self.Conv2(x))   # Apply convolution, then ReLU activation
        #print("shape after second conv",x.shape)
        x = self.Dropout1(x)    #Apply dropout
        #print("shape after first dropout",x.shape)
        x = torch.flatten(x, start_dim=1) # Reshape the tensor for the fully connected layer
        #print("shape after flattening",x.shape)
        x = F.relu(self.FC1(x))     # Apply ReLU activation to the first fully connected layer
        #print("shape after first FC",x.shape)
        x = self.Dropout2(x)    #Apply dropout
        #print("shape after second dropout",x.shape)
        x = self.FC2(x)      # Apply the second fully connected layer, then Softmax
        x = F.softmax(x, dim=1)
        #print("shape after second FC",x.shape)
        return x


In [8]:
# Define hyperparameters
learning_rate = 0.01
epochs = 10

# Create the model
model = Net()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
scheduler_lr = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# For speed
if torch.cuda.is_available():
      device = torch.device('cuda:0')
else:
      device = torch.device('cpu')

# Start training loop
for epoch in range(epochs):
  # Track total loss and correct predictions for the epoch
  total_loss = 0.0
  correct_predictions = 0
  model.train()
  model.to(device)

  for i, data in enumerate(trainloader):
    images, labels = data
    images, labels = images.to(device), labels.to(device)

    # Forward pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward pass and parameter update
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Update total loss and calculate accuracy
    total_loss += loss.item()
    _, predicted = torch.max(outputs.data, 1)  # Get the index of the predicted class
    correct_predictions += (predicted == labels).sum().item()  # Count correct predictions

  # Learning rate decay step
  scheduler_lr.step()
  # Calculate and print accuracy
  total_images = len(trainloader.dataset)  # Assuming trainloader keeps track of total images
  accuracy = 100 * correct_predictions / total_images
  print(f'Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {accuracy:.2f}%')

Epoch [1/10], Loss: 1655.3059, Accuracy: 73.30%
Epoch [2/10], Loss: 1483.3491, Accuracy: 88.84%
Epoch [3/10], Loss: 1462.8257, Accuracy: 90.81%
Epoch [4/10], Loss: 1451.1748, Accuracy: 91.90%
Epoch [5/10], Loss: 1443.9199, Accuracy: 92.64%
Epoch [6/10], Loss: 1435.4169, Accuracy: 93.50%
Epoch [7/10], Loss: 1433.2916, Accuracy: 93.77%
Epoch [8/10], Loss: 1431.9012, Accuracy: 93.98%
Epoch [9/10], Loss: 1431.4898, Accuracy: 93.99%
Epoch [10/10], Loss: 1430.8010, Accuracy: 94.03%


In [9]:
# Evaluate the model on the test set
model.eval()  # Set the model to evaluation mode
test_correct_predictions = 0

# Disable gradient computation during evaluation
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        test_correct_predictions += (predicted == labels).sum().item()

# Calculate test accuracy
test_total_images = len(testloader.dataset)
test_accuracy = 100 * test_correct_predictions / test_total_images
print(f'Test Accuracy: {test_accuracy:.2f}%')


Test Accuracy: 95.83%


In [12]:
# Save model parameters
torch.save(model.state_dict(), 'model_a_test.pth')

# Load model parameters
# Make sure to create an instance of the model before loading parameters
model = Net()
model.load_state_dict(torch.load('model_parameters_A.pth'))


<All keys matched successfully>