In [4]:
import torch
import torchvision
import numpy as np
from matplotlib import pyplot as plt
from torchsummary import summary

%matplotlib inline

In [5]:
# load mnist dataset
transform = torchvision.transforms.ToTensor()
train = torchvision.datasets.MNIST(root='../datasets/', download = True, train= True, transform = transform)
test  = torchvision.datasets.MNIST(root='../datasets/', download = True, train= False, transform = transform)

In [6]:
train_dataloader = torch.utils.data.DataLoader(dataset=train,batch_size=50,shuffle = True)
test_dataloader = torch.utils.data.DataLoader(dataset=test,batch_size=50,shuffle = False)

In [7]:
# shape of each batch
print(next(iter(train_dataloader))[0].shape)
#labels
print(next(iter(train_dataloader))[1].shape)

torch.Size([50, 1, 28, 28])
torch.Size([50])


In [8]:
# show the dataset


In [12]:
# create a neural network
from torch import nn
import torch.nn.functional as F
class MLP_network(nn.Module):
    def __init__(self):
        super(MLP_network, self).__init__()
        # input size
        self.fc1 = nn.Linear(28*28, 500)
        # next layer
        self.fc2 = nn.Linear(500, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, 64)
        # output layer
        self.fc5 = nn.Linear(64, 10)
    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x) # output
        return x




In [13]:
model = MLP_network()
device = 'cuda'
model.to('cuda')

MLP_network(
  (fc1): Linear(in_features=784, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=256, bias=True)
  (fc3): Linear(in_features=256, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=64, bias=True)
  (fc5): Linear(in_features=64, out_features=10, bias=True)
)

In [14]:
# print total learnable parameters
total_params = sum(p.numel() for p in model.parameters() )
print(total_params)
print(" total parameters counted  second layer", (256) * 500 + 256 )
summary(model,(1,28,28))

603646
 total parameters counted  second layer 128256
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                  [-1, 500]         392,500
            Linear-2                  [-1, 256]         128,256
            Linear-3                  [-1, 256]          65,792
            Linear-4                  [-1, 256]          65,792
            Linear-5                   [-1, 64]          16,448
            Linear-6                   [-1, 10]             650
Total params: 669,438
Trainable params: 669,438
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 2.55
Estimated Total Size (MB): 2.57
----------------------------------------------------------------


In [15]:
optimizer = torch.optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss().to(device='cuda')

In [23]:
# loop over the epochs
for  i in range(0, 20):
    avgloss = []
    for batch,(images, labels) in enumerate(train_dataloader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        
#         print(outputs[0], labels[0])
#         print((outputs[0]))
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(np.mean(avgloss))
        # testing
    correct_cnt, ave_loss = 0, 0
    total_cnt = 0
    for batch_idx, (x, target) in enumerate(test_dataloader):
        if device:
            x, target = x.cuda(), target.cuda()
    #     x, target = Variable(x, volatile=True), Variable(target, volatile=True)
        out = model(x)
        loss = criterion(out, target)
        _, pred_label = torch.max(out.data, 1)
        total_cnt += x.data.size()[0]
        correct_cnt += (pred_label == target.data).sum()
        # smooth average
    #     ave_loss = ave_loss * 0.9 + loss.data[0] * 0.1

        if(batch_idx+1) % 100 == 0 or (batch_idx+1) == len(test_dataloader):
            print( '==>>>  batch index: {}, acc: {:.3f}'.format(
                 batch_idx+1, correct_cnt * 1.0 / total_cnt))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


nan
==>>>  batch index: 100, acc: 0.975
==>>>  batch index: 200, acc: 0.982
nan
==>>>  batch index: 100, acc: 0.970
==>>>  batch index: 200, acc: 0.979
nan
==>>>  batch index: 100, acc: 0.971
==>>>  batch index: 200, acc: 0.980
nan
==>>>  batch index: 100, acc: 0.973
==>>>  batch index: 200, acc: 0.982
nan
==>>>  batch index: 100, acc: 0.972
==>>>  batch index: 200, acc: 0.979
nan
==>>>  batch index: 100, acc: 0.975
==>>>  batch index: 200, acc: 0.983
nan
==>>>  batch index: 100, acc: 0.977
==>>>  batch index: 200, acc: 0.984
nan
==>>>  batch index: 100, acc: 0.977
==>>>  batch index: 200, acc: 0.984
nan
==>>>  batch index: 100, acc: 0.978
==>>>  batch index: 200, acc: 0.984
nan
==>>>  batch index: 100, acc: 0.980
==>>>  batch index: 200, acc: 0.986
nan
==>>>  batch index: 100, acc: 0.979
==>>>  batch index: 200, acc: 0.985
nan
==>>>  batch index: 100, acc: 0.979
==>>>  batch index: 200, acc: 0.984
nan
==>>>  batch index: 100, acc: 0.979
==>>>  batch index: 200, acc: 0.984
nan
==>>>  b

In [None]:
# testing
correct_cnt, ave_loss = 0, 0
total_cnt = 0
for batch_idx, (x, target) in enumerate(test_dataloader):
    if device:
        x, target = x.cuda(), target.cuda()
#     x, target = Variable(x, volatile=True), Variable(target, volatile=True)
    out = model(x)
    loss = criterion(out, target)
    _, pred_label = torch.max(out.data, 1)
    total_cnt += x.data.size()[0]
    correct_cnt += (pred_label == target.data).sum()
    # smooth average
#     ave_loss = ave_loss * 0.9 + loss.data[0] * 0.1

    if(batch_idx+1) % 100 == 0 or (batch_idx+1) == len(test_dataloader):
        print( '==>>>  batch index: {}, acc: {:.3f}'.format(
             batch_idx+1, correct_cnt * 1.0 / total_cnt))

In [None]:
model3