# Imports


In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import numpy as np
print("Torch Version:",torch.__version__)
from torch import nn
import torch.nn.functional as F
from torchvision import datasets,transforms
import torch.optim as optim

Torch Version: 1.4.0


In [2]:
import sys,os
sys.path.insert(0,os.path.abspath("../"))
from src import Report, Config, HyperParameters

# Hyperparameters

In [3]:
h_p = HyperParameters(training_config = Config(batch_size=64 , lr=0.01 , no_epochs=10),
                      inference_config = Config(batch_size=64),
                      model_config = Config(no_of_channels=10 ,classes = ["zero","one","two","three","four","five","six","seven","eight","nine"])
                     )


In [4]:
print(f"Training batch size {h_p.training_config.batch_size}")
print(f"Number of classes {h_p.model_config.classes.__len__()}") 

Training batch size 64
Number of classes 10


# Datasets and Dataloaders

In [5]:
transform=transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.1307,), (0.3081,))]
                            )

trainset=datasets.MNIST('~/.pytorch/MNIST_data/',
                        train=True,
                        transform=transform,
                        download=True)

validset=datasets.MNIST('~/.pytorch/MNIST_data/',
                        train=False,
                        transform=transform,
                        download=True)

train_loader=torch.utils.data.DataLoader(trainset,
                                         batch_size=h_p.training_config.batch_size,
                                         shuffle=True,
                                         num_workers=0)

valid_loader=torch.utils.data.DataLoader(validset,
                                         batch_size=h_p.inference_config.batch_size,
                                         shuffle=True,
                                         num_workers=0)

# Model Defination

In [6]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 
                               h_p.model_config.no_of_channels,
                               3)  #[in_channel,out_channel,filter_size,stride=1]
        
        self.fc2 = nn.Linear(8*8*h_p.model_config.no_of_channels,
                             h_p.model_config.classes.__len__())

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x,3)
        x = x.view(-1, 8*8*10)
        x = self.fc2(x)
        return x # return raw logits.
      
# create a complete CNN
model = Net()
print(model)

Net(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (fc2): Linear(in_features=640, out_features=10, bias=True)
)


In [7]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
print("Total_params",pytorch_total_params)
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Trainable_params",pytorch_total_params)

Total_params 6510
Trainable_params 6510


# Report Intialization

In [8]:
data , label = next(iter(train_loader))
print(f"The first input batch shape {data.shape}")

The first input batch shape torch.Size([64, 1, 28, 28])


In [9]:
report = Report(classes=h_p.model_config.classes) # intialize the class
report.plot_model(model, data) # plot model

<src.report.Report at 0x7f0a4cbc6208>

# Criterion and Optimizer

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=h_p.training_config.lr)

# Training Method

In [11]:
def train_an_epoch():
    model.train()
    train_loss = 0.0
    for data, target in train_loader:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*data.size(0)
        
        # converting raw _logits to softmax output
        output = F.softmax(output,dim=-1)
        
        # write training batch information into report
        report.write_a_batch(loss=loss,
                             batch_size=data.size(0),
                             actual=target,
                             prediction=output,
                             train=True)
        
        #plot histogram of model weight, bias and gradients 2 times in an epoch
        report.plot_model_data_grad(at_which_iter = len(train_loader)/2)
        
    return train_loss

# Validation Method

In [12]:
def valid_an_epoch():
        
    valid_loss = 0.0
    model.eval()
    for data, target in valid_loader:
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        
        
        # converting raw _logits to softmax output
        output = F.softmax(output,dim=-1)
        
        # write validation batch information into report
        report.write_a_batch(loss=loss,
                             batch_size=data.size(0),
                             actual=target,
                             prediction=output,
                             train=False)
    return valid_loss

# Training and Plotting

In [13]:
for epoch in range(1, h_p.training_config.no_epochs+1):

    train_loss = train_an_epoch()
    valid_loss = valid_an_epoch()
    
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
        
    # print training/validation statistics 
    print(f'Epoch: {epoch} \tTraining Loss: {train_loss:.6f} \tValidation Loss: {valid_loss:.6f}')
    
    
    
    report.plot_an_epoch(detail=True)\
    .plot_loss()\
    .plot_precision_recall()\
    .plot_mcc()\
    .plot_confusion_matrix(at_which_epoch=h_p.training_config.no_epochs/2)\
    .plot_missclassification_count(at_which_epoch=h_p.training_config.no_epochs/2)\
    .plot_pred_prob(at_which_epoch=h_p.training_config.no_epochs)

Epoch: 1 	Training Loss: 0.439560 	Validation Loss: 0.256226
Epoch: 2 	Training Loss: 0.238404 	Validation Loss: 0.185166
Epoch: 3 	Training Loss: 0.185101 	Validation Loss: 0.149065
Epoch: 4 	Training Loss: 0.155258 	Validation Loss: 0.126869
Epoch: 5 	Training Loss: 0.137192 	Validation Loss: 0.131225
Epoch: 6 	Training Loss: 0.124772 	Validation Loss: 0.105704
Epoch: 7 	Training Loss: 0.114972 	Validation Loss: 0.101094
Epoch: 8 	Training Loss: 0.107882 	Validation Loss: 0.097820
Epoch: 9 	Training Loss: 0.102315 	Validation Loss: 0.092370
Epoch: 10 	Training Loss: 0.097554 	Validation Loss: 0.090351


# Plot Hyperparams

In [14]:
report.plot_hparams(h_p) # record hyper params once training is completed

<src.report.Report at 0x7f0a4cbc6208>

#    Last Epoch Statistics

In [15]:
report.loss_count # final loss for 10th epoch 

{'train': 0.09755439987679322, 'valid': 0.09035115723609924}

In [16]:
report.act_pred_dict # actual and prediction for 10th epoch

{'train': {'actual': array([2, 2, 3, ..., 3, 5, 8]),
  'pred': array([[2.48052995e-04, 2.56126282e-06, 9.99349177e-01, ...,
          1.75650166e-06, 7.01895033e-07, 1.77736339e-07],
         [9.09831726e-07, 4.25156759e-06, 9.97760773e-01, ...,
          2.16383650e-03, 3.05638241e-05, 1.48316531e-06],
         [1.25071892e-05, 6.65944419e-04, 1.18769694e-03, ...,
          8.64696403e-07, 2.03786956e-04, 1.00202044e-04],
         ...,
         [6.38893098e-08, 7.92739058e-07, 1.20110897e-04, ...,
          3.35105483e-07, 2.92154786e-04, 2.76299164e-04],
         [5.92773741e-09, 1.71622860e-09, 3.84681016e-06, ...,
          3.61187311e-08, 1.76191481e-03, 1.02757906e-04],
         [4.52033646e-06, 1.93666483e-06, 5.22399414e-03, ...,
          2.39940956e-08, 3.00623268e-01, 7.03178375e-05]], dtype=float32)},
 'valid': {'actual': array([5, 7, 2, ..., 9, 7, 2]),
  'pred': array([[1.3033445e-05, 1.3001286e-04, 3.8345825e-04, ..., 1.7812144e-07,
          7.1492861e-04, 8.7654627e-07]

In [17]:
report.counter # number of epoch

10

In [18]:
report.iter_count # train_loader and valid_loader size

Counter({'train': 9380, 'valid': 1570})

In [19]:
report.data_count # data point count

Counter({'train': 60000, 'valid': 10000})

In [20]:
report.mcc # Mathews correlation coefficient for 10th epoch

Counter({'train': 0.9679713295792429, 'valid': 0.9693575850914065})

In [21]:
report.close() # close the writer object.