In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torchvision import transforms

torch.set_printoptions(edgeitems=2, linewidth=75)
data_path = '../data-unversioned/p1ch7/'

In [None]:
cifar10 = datasets.CIFAR10(data_path, train=True, download= True)           # Training dataset....
cifar10_val = datasets.CIFAR10(data_path, train=False, download= True)      # Dataset for validation....

In [5]:
class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck'] # class definations...

In [6]:
# Splitting dataset into training and validation set...

# Trainig data...
cifar10_Train = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

# Validation data...
cifar10_Val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))


In [7]:
# Hyperparameters...
learning_Rate = 1e-2
num_epochs = 300
loss_fnc = nn.CrossEntropyLoss()

# Defining the model...
model22 = nn.Sequential(
                       nn.Linear(3072, 1024),
                       nn.Tanh(),
                       nn.Linear(1024, 512),
                       nn.Tanh(),
                       nn.Linear(512, 128),
                       nn.Tanh(),
                       nn.Linear(128,10))

# Optimizer function...
optimizer = optim.SGD(model22.parameters(), lr = learning_Rate)

## Training Observations:

- It took about 2h 13m 46s to train with 300 epochs on colab, with GPU accelration.
- It took about 1h 35m 30s to train with 200 epochs on colab, with GPU accelration.
- Training loss at 200 epochs is less than 0.000425.
- Training loss at 300 epochs about 0.000234.

In [6]:
# Dividing dataset into mini-batches...
train_mbload = torch.utils.data.DataLoader(cifar10_Train, batch_size = 64, shuffle=True)

# Training loop...
for epoch in range(num_epochs):
    for imgs, labels in train_mbload:
        outputs = model22(imgs.view(imgs.shape[0], -1))
        loss = loss_fnc(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if epoch == 1 or epoch % 30 == 0 or epoch == 299:
       print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

Epoch: 0, Loss: 1.772933
Epoch: 1, Loss: 2.091775
Epoch: 30, Loss: 0.569241
Epoch: 60, Loss: 0.006395
Epoch: 90, Loss: 0.002145
Epoch: 120, Loss: 0.001343
Epoch: 150, Loss: 0.000281
Epoch: 180, Loss: 0.000425
Epoch: 210, Loss: 0.000318
Epoch: 240, Loss: 0.000340
Epoch: 270, Loss: 0.000194
Epoch: 299, Loss: 0.000234


## Accuracy and Analysis:

- As, mentioned below Training Accuracy: 1.000, and Validation Accuracy: 0.4730.
- Its apparent from accuracy values that developed model22 is overfitting. As it has perfect accuracy for training set and bad accuracy on validation set.
- One reason for bad accuracy is on validation set is that, model might have memorized and fix the parameter values learned on training set.

In [7]:
# Checking the accuracy of model...
train_accuload = torch.utils.data.DataLoader(cifar10_Train, batch_size = 64, shuffle=False)
Valid_accuload = torch.utils.data.DataLoader(cifar10_Val, batch_size = 64, shuffle=False)

# Initializing the variables...
training_correct = 0
validation_correct = 0
training_total = 0
validation_total = 0

# Checking on the training dataset...
with torch.no_grad():
  for imgs, labels in train_accuload:
    train_results = model22(imgs.view(imgs.shape[0],-1))
    _, train_classified = torch.max(train_results, dim =1)
    training_total += labels.shape[0]
    training_correct += int((train_classified == labels).sum())

print('Training Accuracy: %f' % (training_correct/training_total))

# Checking on the validation dataset...
with torch.no_grad():
  for imgs, labels in Valid_accuload:
    Valid_results = model22(imgs.view(imgs.shape[0],-1))
    _, Valid_classified = torch.max(Valid_results, dim =1)
    validation_total += labels.shape[0]
    validation_correct += int((Valid_classified == labels).sum())

print('Validation Accuracy: %f' % (validation_correct/validation_total))

Training Accuracy: 1.000000
Validation Accuracy: 0.473000


## Model Size: 

- Total parameters in model22 are 3738506.
- Hidden layer 1-> 3145728 weights, 1024 baise.
- Hidden layer 2-> 524288 weights, 512 baise.
- Hidden layer 3-> 65536 weights, 128 baise.
- Output layer-> 1280 weights, 10 baise.

In [9]:
# Model Size in terms of parameters...

Parameter_List = [para.numel() for para in model22.parameters() if para.requires_grad == True]
sum(Parameter_List), Parameter_List

(3738506, [3145728, 1024, 524288, 512, 65536, 128, 1280, 10])

## Comparing with problem 1:

- Compared to model12 in problem 1, model22 in this current problem is complex by far.
- Validation acurracy of model22 in this problem is quite less compared to problem 1's model12. And model22, shows overfitting on contrary to problem 1's model12.
- Nature of data in dataset in both the problem are quite different. That makes a major difference in validation accuracy. 