Import and preparing of datasets

In [26]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import time
from sklearn.metrics import classification_report


# Check Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define Hyper-parameters
input_size = 784
num_classes = 10
num_epochs = 20
batch_size = 100
learning_rate = 0.01

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='../../data',
                                          train=False,
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)


Network #1: 4-layer network
a. Layer 1 – 20 neurons
b. Layer 2 – 50 neurons
c. Layer 3 – 20 neurons
d. Layer 4 – output neuron with softmax activation

In [27]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        self.fc1 = nn.Linear(input_size, 20)
        self.fc2 = nn.Linear(20, 50)
        self.fc3 = nn.Linear(50, 20)
        self.fc4 = nn.Linear(20, 10)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.relu(self.fc2(out))
        out = self.relu(self.fc3(out))
        out = self.softmax(self.fc4(out))
        return out

model = NeuralNet(input_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
start_time = time.time()
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backprpagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
end_time = time.time()
total_time = end_time - start_time


# Test the model
# In the test phase, don't need to compute gradients (for memory efficiency)
y_true = list()
y_pred = list()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        for i in range(len(outputs)):
            y_true.append(labels[i].item())
            y_pred.append(predicted[i].cpu())

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total),f"Total training time: {total_time:.2f} seconds")
print(classification_report(y_true, y_pred,target_names=["0","1","2","3","4","5","6","7","8","9"], digits=4))


Epoch [1/20], Step [100/600], Loss: 1.6554
Epoch [1/20], Step [200/600], Loss: 1.7440
Epoch [1/20], Step [300/600], Loss: 1.5787
Epoch [1/20], Step [400/600], Loss: 1.6417
Epoch [1/20], Step [500/600], Loss: 1.6933
Epoch [1/20], Step [600/600], Loss: 1.6418
Epoch [2/20], Step [100/600], Loss: 1.6432
Epoch [2/20], Step [200/600], Loss: 1.6489
Epoch [2/20], Step [300/600], Loss: 1.6435
Epoch [2/20], Step [400/600], Loss: 1.6587
Epoch [2/20], Step [500/600], Loss: 1.6516
Epoch [2/20], Step [600/600], Loss: 1.6257
Epoch [3/20], Step [100/600], Loss: 1.6396
Epoch [3/20], Step [200/600], Loss: 1.6893
Epoch [3/20], Step [300/600], Loss: 1.5933
Epoch [3/20], Step [400/600], Loss: 1.6405
Epoch [3/20], Step [500/600], Loss: 1.5708
Epoch [3/20], Step [600/600], Loss: 1.7212
Epoch [4/20], Step [100/600], Loss: 1.6263
Epoch [4/20], Step [200/600], Loss: 1.7010
Epoch [4/20], Step [300/600], Loss: 1.6308
Epoch [4/20], Step [400/600], Loss: 1.6524
Epoch [4/20], Step [500/600], Loss: 1.6412
Epoch [4/20

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


2. Network #2: 6- layer network
a. Layer 1 – 10 neurons
b. Layer 2 – 20 neurons
c. Layer 3 – 30 neurons
d. Layer 4 – 20 neurons
e. Layer 5 – 10 neurons
f. Layer 6 – output neuron with softmax activation

In [28]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 20)
        self.fc3 = nn.Linear(20, 30)
        self.fc4 = nn.Linear(30, 20)
        self.fc5 = nn.Linear(20, 10)
        self.fc6 = nn.Linear(10, 10)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.relu(self.fc2(out))
        out = self.relu(self.fc3(out))
        out = self.relu(self.fc4(out))
        out = self.relu(self.fc5(out))
        out = self.softmax(self.fc6(out))
        return out

model = NeuralNet(input_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
start_time = time.time()
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backprpagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
end_time = time.time()
total_time = end_time - start_time


# Test the model
# In the test phase, don't need to compute gradients (for memory efficiency)
y_true = list()
y_pred = list()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        for i in range(len(outputs)):
            y_true.append(labels[i].item())
            y_pred.append(predicted[i].cpu())

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total),f"Total training time: {total_time:.2f} seconds")
print(classification_report(y_true, y_pred,target_names=["0","1","2","3","4","5","6","7","8","9"], digits=4))

Epoch [1/20], Step [100/600], Loss: 1.9904
Epoch [1/20], Step [200/600], Loss: 1.7853
Epoch [1/20], Step [300/600], Loss: 1.8608
Epoch [1/20], Step [400/600], Loss: 1.7279
Epoch [1/20], Step [500/600], Loss: 1.7308
Epoch [1/20], Step [600/600], Loss: 1.7964
Epoch [2/20], Step [100/600], Loss: 1.6663
Epoch [2/20], Step [200/600], Loss: 1.9329
Epoch [2/20], Step [300/600], Loss: 1.7641
Epoch [2/20], Step [400/600], Loss: 1.8159
Epoch [2/20], Step [500/600], Loss: 1.6709
Epoch [2/20], Step [600/600], Loss: 1.7202
Epoch [3/20], Step [100/600], Loss: 1.7409
Epoch [3/20], Step [200/600], Loss: 1.7414
Epoch [3/20], Step [300/600], Loss: 1.6634
Epoch [3/20], Step [400/600], Loss: 1.6701
Epoch [3/20], Step [500/600], Loss: 1.6814
Epoch [3/20], Step [600/600], Loss: 1.8298
Epoch [4/20], Step [100/600], Loss: 1.7888
Epoch [4/20], Step [200/600], Loss: 1.8806
Epoch [4/20], Step [300/600], Loss: 1.8410
Epoch [4/20], Step [400/600], Loss: 1.7211
Epoch [4/20], Step [500/600], Loss: 1.7109
Epoch [4/20

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


3. Network #3: 6- layer network
a. Layer 1 – 10 neurons
b. Layer 2 – 40 neurons
c. Layer 3 – 70 neurons
d. Layer 4 – 40 neurons
e. Layer 5 – 10 neurons
f. Layer 6 – output neuron with softmax activation

In [29]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNet, self).__init__()
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 40)
        self.fc3 = nn.Linear(40, 70)
        self.fc4 = nn.Linear(70, 40)
        self.fc5 = nn.Linear(40, 10)
        self.fc6 = nn.Linear(10, 10)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.relu(self.fc2(out))
        out = self.relu(self.fc3(out))
        out = self.relu(self.fc4(out))
        out = self.relu(self.fc5(out))
        out = self.softmax(self.fc6(out))
        return out

model = NeuralNet(input_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
start_time = time.time()
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backprpagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
end_time = time.time()
total_time = end_time - start_time


# Test the model
# In the test phase, don't need to compute gradients (for memory efficiency)
y_true = list()
y_pred = list()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        for i in range(len(outputs)):
            y_true.append(labels[i].item())
            y_pred.append(predicted[i].cpu())

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total),f"Total training time: {total_time:.2f} seconds")
print(classification_report(y_true, y_pred,target_names=["0","1","2","3","4","5","6","7","8","9"], digits=4))

Epoch [1/20], Step [100/600], Loss: 2.0286
Epoch [1/20], Step [200/600], Loss: 1.9152
Epoch [1/20], Step [300/600], Loss: 1.8038
Epoch [1/20], Step [400/600], Loss: 1.8140
Epoch [1/20], Step [500/600], Loss: 1.8415
Epoch [1/20], Step [600/600], Loss: 1.8693
Epoch [2/20], Step [100/600], Loss: 1.9243
Epoch [2/20], Step [200/600], Loss: 1.8265
Epoch [2/20], Step [300/600], Loss: 2.0530
Epoch [2/20], Step [400/600], Loss: 2.0189
Epoch [2/20], Step [500/600], Loss: 2.1111
Epoch [2/20], Step [600/600], Loss: 2.2410
Epoch [3/20], Step [100/600], Loss: 2.3711
Epoch [3/20], Step [200/600], Loss: 2.3104
Epoch [3/20], Step [300/600], Loss: 2.1800
Epoch [3/20], Step [400/600], Loss: 2.1812
Epoch [3/20], Step [500/600], Loss: 2.2607
Epoch [3/20], Step [600/600], Loss: 2.1411
Epoch [4/20], Step [100/600], Loss: 2.1412
Epoch [4/20], Step [200/600], Loss: 2.2412
Epoch [4/20], Step [300/600], Loss: 2.2312
Epoch [4/20], Step [400/600], Loss: 2.2011
Epoch [4/20], Step [500/600], Loss: 2.2912
Epoch [4/20

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Which of the three models had the least amount of error for validation?
The first model. It has the best acurracy with the test dataset: 77.68%. The others model have a very poor performance compated with the first one.

How long it took to train each model?
Model 1: 159.65 seconds
Model 2: 170.02 seconds
Model 3: 168.29 seconds
