### *Objective function from Information Theory perspective*

###### Data loading from MNIST dataset

In [80]:
import torchvision.transforms as transforms
import torchvision.datasets as dataset
import torchvision

train_dataset = dataset.MNIST(root = './data', train = True, transform = transforms.ToTensor(), download = True )
test_dataset = dataset.MNIST(root = './data', train = False, transform = transforms.ToTensor())

###### Define dataloaders 

In [81]:
from torch.utils.data import DataLoader

batch_size = 100
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

###### Network model 


In [82]:
import torch.nn.functional as fun
import torch.nn as nn
class DeepNN(nn.Module):
    def __init__(self, input_dim, nh1, nh2, nh3, nh4, nh5, output_dim):
        super(DeepNN, self).__init__()
        #hyperparameter setting 
        self.input_dim = input_dim
        self.nh1, self.nh2, self.nh3, self.nh4, self.nh5, = nh1, nh2, nh3, nh4, nh5
        self.output_dim = output_dim
        #layer definition 
        self.input_layer = nn.Linear(self.input_dim, self.nh1)
        self.hlayer1 = nn.Linear(self.nh1, self.nh2)
        self.hlayer2 = nn.Linear(self.nh2, self.nh3)
        self.hlayer3 = nn.Linear(self.nh3, self.nh4)
        self.hlayer4 = nn.Linear(self.nh4, self.nh5)
        self.output_layer = nn.Linear(self.nh5, self.output_dim)
        
    def forward(self, x):
        #propogation of each layer
        self.out1 = fun.relu(self.input_layer(x))
        self.out2 = fun.relu(self.hlayer1(self.out1))
        self.out3 = fun.relu(self.hlayer2(self.out2))
        self.out4 = fun.relu(self.hlayer3(self.out3))
        self.out5 = fun.relu(self.hlayer4(self.out4))
        self.out6 = fun.relu(self.output_layer(self.out5))
        return fun.log_softmax(self.out6, dim = 1)


In [83]:
obj = DeepNN(784, 1000, 1200, 1100, 1000, 100, 10)

In [None]:
#obj.cuda()          #if GPU available enable CUDA extension

###### Define Loss criterion and optimization method

In [None]:
import torch
criterion = nn.CrossEntropyLoss
optimizer = torch.optim.Adam(obj.parameters(), lr = 0.001)

###### Train Network 

In [None]:
from torch.autograd import Variable
epochs = 30
train_loss = []
for epoch in range(epochs):
    loss_monitor = []
    for i,(images, labels) in enumerate(train_loader):
        images = images.reshape(-1, 28*28)
        optimizer.zero_grad()
        pred = obj(images)
        loss = fun.nll_loss(pred, labels)
        #loss = criterion(pred, labels)
        loss.backward()
        optimizer.step()
        if (i+1) % 100 == 0:                              
            loss_monitor.append(loss.item())
            print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'%(epoch+1, epochs, i+1, len(train_dataset)//batch_size, loss.item()))
    print('Epoch %d/%d Loss: %.5f'%(epoch+1,epochs,sum(loss_monitor)/len(loss_monitor)))
    train_loss.append(sum(loss_monitor)/len(loss_monitor))
    print(" --------------------- ")

Epoch [1/30], Step [100/600], Loss: 1.0506
Epoch [1/30], Step [200/600], Loss: 0.8101
Epoch [1/30], Step [300/600], Loss: 0.8304
Epoch [1/30], Step [400/600], Loss: 0.6280
Epoch [1/30], Step [500/600], Loss: 0.6061
Epoch [1/30], Step [600/600], Loss: 0.5799
Epoch 1/30 Loss: 0.75086
 --------------------- 
Epoch [2/30], Step [100/600], Loss: 0.5464
Epoch [2/30], Step [200/600], Loss: 0.5816
Epoch [2/30], Step [300/600], Loss: 0.6981
Epoch [2/30], Step [400/600], Loss: 0.6173
Epoch [2/30], Step [500/600], Loss: 0.6299
Epoch [2/30], Step [600/600], Loss: 0.6375
Epoch 2/30 Loss: 0.61846
 --------------------- 
Epoch [3/30], Step [100/600], Loss: 0.3992
Epoch [3/30], Step [200/600], Loss: 0.6102
Epoch [3/30], Step [300/600], Loss: 0.5729
Epoch [3/30], Step [400/600], Loss: 0.4070
Epoch [3/30], Step [500/600], Loss: 0.4510
Epoch [3/30], Step [600/600], Loss: 0.4453
Epoch 3/30 Loss: 0.48094
 --------------------- 
Epoch [4/30], Step [100/600], Loss: 0.4756
Epoch [4/30], Step [200/600], Loss: 

In [None]:
import matplotlib.pyplot as plt
figure = plt.figure()
plt.plot(range(epochs), train_loss, color = 'blue')
plt.legend([train_loss], loc = 'upper right')
plt.xlabel('Epochs')
plt.ylabel('Training Loss')
figure