In [1]:
import torch
from torch import nn
from load_mnist import load_mnist
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(0)

## 1.1

In [10]:
def training_curve_plot(title, train_losses, test_losses, train_accuracy, test_accuracy):
    """ 
    convenience function for plotting train and test loss and accuracy
    """
    lg=13
    md=10
    sm=9
    fig, axs = plt.subplots(1, 2, figsize=(12, 4))
    fig.suptitle(title, fontsize=lg)
    x = range(1, len(train_losses)+1)
    axs[0].plot(x, train_losses, label=f'Final train loss: {train_losses[-1]:.4f}')
    axs[0].plot(x, test_losses, label=f'Final test loss: {test_losses[-1]:.4f}')
    axs[0].set_title('Losses', fontsize=md)
    axs[0].set_xlabel('Iteration', fontsize=md)
    axs[0].set_ylabel('Loss', fontsize=md)
    axs[0].legend(fontsize=sm)
    axs[0].tick_params(axis='both', labelsize=sm)
    # Optionally use a logarithmic y-scale
    #axs[0].set_yscale('log')
    axs[0].grid(True, which="both", linestyle='--', linewidth=0.5)
    axs[1].plot(x, train_accuracy, label=f'Final train accuracy: {train_accuracy[-1]:.4f}%')
    axs[1].plot(x, test_accuracy, label=f'Final test accuracy: {test_accuracy[-1]:.4f}%')
    axs[1].set_title('Accuracy', fontsize=md)
    axs[1].set_xlabel('Iteration', fontsize=md)
    axs[1].set_ylabel('Accuracy (%)', fontsize=sm)
    axs[1].legend(fontsize=sm)
    axs[1].tick_params(axis='both', labelsize=sm)
    axs[1].grid(True, which="both", linestyle='--', linewidth=0.5)
    plt.show()

In [3]:
#Network architecture
# Implement a 2-layer neural network
class Net(nn.Module):
    def __init__(self, input_size, hidden_size_1, hidden_size_2, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size_1)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size_1, hidden_size_2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size_2, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
       
        return out

In [43]:


#Load MNIST dataset
x_train, y_train, x_test, y_test = load_mnist()

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
X_train_3Dtensor = torch.tensor(x_train, dtype=torch.float32).view(-1, 1, 28, 28)
Y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(x_test, dtype=torch.float32)
X_train_3Dtensor = torch.tensor(x_train, dtype=torch.float32).view(-1, 1, 28, 28)
Y_test_tensor = torch.tensor(y_test, dtype=torch.long)

train_set = torch.utils.data.TensorDataset(X_train_tensor, Y_train_tensor)
test_set = torch.utils.data.TensorDataset(X_test_tensor, Y_test_tensor)

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)

train_3d = torch.utils.data.TensorDataset(X_train_3Dtensor, Y_train_tensor)
train_loader_3d = torch.utils.data.DataLoader(train_3d, batch_size=64, shuffle=True)
test_3d = torch.utils.data.TensorDataset(X_train_3Dtensor, Y_train_tensor)
test_loader_3d = torch.utils.data.DataLoader(test_3d, batch_size=64, shuffle=False)

In [44]:
def train_network(model: nn.Module, criteria: nn.Module, 
                  optimizer: torch.optim.Optimizer, num_epochs: int, train_loader: torch.utils.data.DataLoader, 
                  test_loader: torch.utils.data.DataLoader, device: torch.device):
    """
    Train a neural network model
    
    Args:
        model: The neural network model to be trained
        criteria: The loss function
        optimizer: The optimizer
        num_epochs: The number of epochs to train the model
        train_loader: The training data loader
        test_loader: The test data loader
        device: The device to run the model on
    """
    train_costs = []
    test_costs = []
    train_accuracy = []
    test_accuracy = []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            
            
            outputs = model(images)
            loss = criteria(outputs, torch.argmax(labels, dim=1))
            train_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(torch.argmax(labels, dim=1)).sum().item()
        
        train_costs.append(train_loss / len(train_loader))
        train_accuracy.append(100 * correct / total)
    
        model.eval()
        test_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for i, (images, labels) in enumerate(test_loader):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criteria(outputs, torch.argmax(labels, dim=1))
                test_loss += loss.item()

                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(torch.argmax(labels, dim=1)).sum().item()
            
            test_costs.append(test_loss / len(test_loader))
            test_accuracy.append(100 * correct / total)
        
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_costs[-1]:.4f}, Test Loss: {test_costs[-1]:.4f}, Train Accuracy: {train_accuracy[-1]:.4f}%, Test Accuracy: {test_accuracy[-1]:.4f}%')
    
    training_curve_plot('2-layer Neural Network', train_costs, test_costs, train_accuracy, test_accuracy)

            
    

In [8]:
#Hyperparameters
input_size = 28*28
hidden_size_1 = 512
hidden_size_2 = 256
num_classes = 10
num_epochs = 100
learning_rate = 0.01

In [48]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Net(input_size, hidden_size_1, hidden_size_2, num_classes).to(device)
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

train_network(model, criteria, optimizer, num_epochs, train_loader, test_loader, device)

NameError: name 'Net' is not defined

In [49]:
class mlconvnet(nn.Module):
    def __init__(self):
        super(mlconvnet, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, 1, 1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(8, 16, 3, 1, 1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(16, 32, 3, 1, 1)
        self.relu3 = nn.ReLU()
        self.fc1 = nn.Linear(32*7*7, 10)
        
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)

        return x

In [52]:
# Create a multi layer conv net
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
convenet = mlconvnet().to(device)
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(convenet.parameters(), lr=learning_rate)

train_network(convenet, criteria, optimizer, num_epochs, train_loader_3d, test_loader_3d, device)

Epoch 1/100, Train Loss: 1.2362, Test Loss: 0.4683, Train Accuracy: 61.2417%, Test Accuracy: 84.5750%
Epoch 2/100, Train Loss: 0.2826, Test Loss: 0.2156, Train Accuracy: 91.4117%, Test Accuracy: 93.5600%
Epoch 3/100, Train Loss: 0.1887, Test Loss: 0.1617, Train Accuracy: 94.3600%, Test Accuracy: 95.1950%
Epoch 4/100, Train Loss: 0.1423, Test Loss: 0.1261, Train Accuracy: 95.7467%, Test Accuracy: 96.3800%
Epoch 5/100, Train Loss: 0.1155, Test Loss: 0.1003, Train Accuracy: 96.4933%, Test Accuracy: 97.0417%
Epoch 6/100, Train Loss: 0.0990, Test Loss: 0.0970, Train Accuracy: 96.9967%, Test Accuracy: 97.1117%
Epoch 7/100, Train Loss: 0.0875, Test Loss: 0.0794, Train Accuracy: 97.3700%, Test Accuracy: 97.5500%
Epoch 8/100, Train Loss: 0.0786, Test Loss: 0.0760, Train Accuracy: 97.6000%, Test Accuracy: 97.5600%
Epoch 9/100, Train Loss: 0.0718, Test Loss: 0.0621, Train Accuracy: 97.8117%, Test Accuracy: 98.1367%
Epoch 10/100, Train Loss: 0.0663, Test Loss: 0.0669, Train Accuracy: 97.9450%, Tes