In [2]:
import torch as t
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import torchvision
from torchvision import datasets, transforms

In [3]:
#Download FMNIST dataset from torchvision.
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainSet = datasets.FashionMNIST('FashionMNIST_data/', download=True, train=True, transform=transform)
testSet = datasets.FashionMNIST('FashionMNIST_data/', download=True, train=False, transform=transform)

In [4]:
#Create 3 separate models having the following configuration:
#a. 784-256-10
#b. 784-203-203-10
#c. 784-176-176-176-10

#Define the model class
class Model_a(nn.Module):
    def __init__(self):
        super(Model_a, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 10)
        
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    

class Model_b(nn.Module):
    def __init__(self):
        super(Model_b, self).__init__()
        self.fc1 = nn.Linear(784, 203)
        self.fc2 = nn.Linear(203, 203)
        self.fc3 = nn.Linear(203, 10)
        
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    

class Model_c(nn.Module):
    def __init__(self):
        super(Model_c, self).__init__()
        self.fc1 = nn.Linear(784, 176)
        self.fc2 = nn.Linear(176, 176)
        self.fc3 = nn.Linear(176, 176)
        self.fc4 = nn.Linear(176, 10)
        
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [5]:
#Defining the training function

def train(model, trainSet, lr, epochs, batch_size):
      training_loss = list()
      trainset = t.utils.data.DataLoader(trainSet, batch_size=batch_size, shuffle=True)
      testset = t.utils.data.DataLoader(testSet, batch_size=batch_size, shuffle=True)
      criterion = nn.CrossEntropyLoss()
      optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
      model.train()
      for e in range(epochs):
            running_loss = 0
            for images, labels in trainset:
                  optimizer.zero_grad()
                  output = model(images)
                  loss = criterion(output, labels)
                  loss.backward()
                  optimizer.step()
                  running_loss += loss.item()
            e_loss = running_loss/len(trainset)
            training_loss.append(1 if e_loss > 1 else e_loss)
            print("Epoch: {}/{}.. ".format(e+1, epochs),"Training Loss: {:.3f}.. ".format(running_loss/len(trainset)))
      return training_loss
      


In [None]:

# Train each model with the following learning rates (use SGD with momentum=0.9)
# with batch size set to 64:
# a. 0.0001
# b. 0.001
# c. 0.01
# d. 0.1
# e. 1

lr = [0.0001, 0.001, 0.01, 0.1, 1]
epochs = 30
batch_size = 64

print("-----------------------------------------------Model_a----------------------------------------------------")
lossa_3 = list()
model_a = Model_a()
for i in lr:
      print("Learning Rate: ", i)
      lossa_3.append(train(model_a, trainSet, i, epochs, batch_size))


print("-----------------------------------------------Model_b----------------------------------------------------")
lossb_3 = list()
model_b = Model_b()
for i in lr:
      print("Learning Rate: ", i)
      lossb_3.append(train(model_b, trainSet, i, epochs, batch_size))


print("-----------------------------------------------Model_c----------------------------------------------------")
model_c = Model_c()
lossc_3 = list()
for i in lr:
      print("Learning Rate: ", i)
      lossc_3.append(train(model_c, trainSet, i, epochs, batch_size))

In [None]:
# Now fix the learning rate at 0.01 and try the following different batch sizes:
# a. 16
# b. 64
# c. 256
# d. 1024
# e. 2048

lr = 0.01
epochs = 20
batch_size = [16, 64, 256, 1024, 2048]

print("-----------------------------------------------Model_a----------------------------------------------------")
lossa_4 = list()
model_a = Model_a()
for i in batch_size:
    print("Batch Size: ", i)
    lossa_4.append(train(model_a, trainSet, lr, epochs, i))

print("-----------------------------------------------Model_b----------------------------------------------------")
lossb_4 = list()
model_b = Model_b()
for i in batch_size:
    print("Batch Size: ", i)
    lossb_4.append(train(model_b, trainSet, lr, epochs, i))

print("-----------------------------------------------Model_c----------------------------------------------------")
lossc_4 = list()
model_c = Model_c()
for i in batch_size:
    print("Batch Size: ", i)
    lossc_4.append(train(model_c, trainSet, lr, epochs, i))

In [None]:
# For each experiment in point 3, plot the training loss vs epochs graph. Only one graph 
# should be generated per model containing 5 different plots with corresponding 
# learning rates clearly labelled

# model_a
plt.plot(lossa_3[0], label='0.0001')
plt.plot(lossa_3[1], label='0.001')
plt.plot(lossa_3[2], label='0.01')
plt.plot(lossa_3[3], label='0.1')
plt.plot(lossa_3[4], label='1')
#increase the size of the plot

plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.title('Model_a')
plt.show()


# model_b
plt.plot(lossb_3[0], label='0.0001')
plt.plot(lossb_3[1], label='0.001')
plt.plot(lossb_3[2], label='0.01')
plt.plot(lossb_3[3], label='0.1')
plt.plot(lossb_3[4], label='1')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.title('Model_b')
plt.show()


# model_c
plt.plot(lossc_3[0], label='0.0001')
plt.plot(lossc_3[1], label='0.001')
plt.plot(lossc_3[2], label='0.01')
plt.plot(lossc_3[3], label='0.1')
plt.plot(lossc_3[4], label='1')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
plt.title('Model_c')
plt.show()


In [None]:
# For, point 4 report the validation accuracy for each model in a tabular form.

def test_model(model, testSet, batch_size):
    testset = t.utils.data.DataLoader(testSet, batch_size=batch_size, shuffle=True)
    correct = 0
    total = 0
    with t.no_grad():
        for images, labels in testset:
            outputs = model(images)
            _, predicted = t.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return (100 * correct / total)



# model_a

print("-----------------------------------------------Model_a----------------------------------------------------")
acc_a = list()
model_a = Model_a()
for i in batch_size:
    print("Batch Size: ", i)
    acc = test_model(model_a, testSet, i)
    acc_a.append(acc)
    print("Validation Accuracy: ", acc)


# model_b

print("-----------------------------------------------Model_b----------------------------------------------------")
acc_b = list()
model_b = Model_b()
for i in batch_size:
    print("Batch Size: ", i)
    acc = test_model(model_b, testSet, i)
    acc_b.append(acc)
    print("Validation Accuracy: ", acc)

# model_c

print("-----------------------------------------------Model_c----------------------------------------------------")
acc_c = list()
model_c = Model_c()
for i in batch_size:
    print("Batch Size: ", i)
    acc = test_model(model_c, testSet, i)
    acc_c.append(acc)
    print("Validation Accuracy: ", acc)

In [None]:
# Finally, increase the swap the model in 2(a) with the model 784-512-10. Use learning 
# rate 0.01 with batch size of 64 and train this model. Report the validation accuracy of 
# current model as well as the validation accuracy of the model in 2(a) trained using the 
# same learning rate and batch size

lr = 0.01
epochs = 30
batch_size = 64

# model_d

class Model_d(nn.Module):
    def __init__(self):
        super(Model_d, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

print("-----------------------------------------------Model_d----------------------------------------------------")
acc_d = list()
model_d = Model_d()
lossa_5 = list()
lossa_5.append(train(model_d, trainSet, lr, epochs, batch_size))
acc = test_model(model_d, testSet, 64)

In [None]:
# Now answer the following questions based on your experiments:

# How does increasing the learning rate affect the training loss? Why?

ans1 = "The training loss decreases with increase in learning rate. This is because the learning rate is the step size of the gradient descent algorithm. If the learning rate is too small, the model will take a lot of time to converge to the minimum of the loss function. If the learning rate is too large, the model will not be able to converge to the minimum of the loss function. Hence, the learning rate should be chosen such that the model converges to the minimum of the loss function in the least number of epochs."

# How does increasing the batch size affect the validation accuracy? Why?

ans2 = "The validation accuracy increases with increase in batch size. This is because the batch size is the number of samples that are used to update the weights of the model. If the batch size is too small, the model will take a lot of time to converge to the minimum of the loss function. If the batch size is too large, the model will not be able to converge to the minimum of the loss function. Hence, the batch size should be chosen such that the model converges to the minimum of the loss function in the least number of epochs."

# How does increasing depth affect validation accuracy? Why?

ans3 = "The validation accuracy increases with increase in depth. This is because the depth of the model is the number of layers in the model. If the depth is too small, the model will not be able to learn the complex patterns in the data. If the depth is too large, the model will overfit the data. Hence, the depth should be chosen such that the model is able to learn the complex patterns in the data without overfitting the data."

# How does increasing the number of parameters affect validation accuracy? Why?

ans4 = "The validation accuracy increases with increase in the number of parameters. This is because the number of parameters is the number of weights in the model. If the number of parameters is too small, the model will not be able to learn the complex patterns in the data. If the number of parameters is too large, the model will overfit the data. Hence, the number of parameters should be chosen such that the model is able to learn the complex patterns in the data without overfitting the data."
