 In this tutorial ,  we will create a Deep Learning model for building a handwritten digit classifier. We will make use of the MNIST dataset included in the torchvision package.

 Mandatory first step is to do the basic data pre-processing steps , using the a utility called transforms which comes from
 torchvision package we will do two below mentioned basic data preprocessing operations.

- Transform the raw dataset into tensors.
- Normalize the dataset.

We will also import the dataset from torch vision package.

In [94]:
import torch
from torchvision.datasets import MNIST

In [95]:
print('test')

from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    transforms.Normalize((0.5, ), (0.5, ))
    ])

test


In [96]:
# choose the training and test datasets
train_data = MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = MNIST(root='data', train=False,
                                  download=True, transform=transform)

In [97]:
#size of train and test data
len(train_data) , len(test_data)

(60000, 10000)

In [98]:
# how many samples per batch to load
batch_size = 50
# percentage of training set to use as validation
valid_size = 0.2

In [99]:
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader

# Here we will use a subset of traning set for validation
# obtain training indices that will be used for validation
num_train = len(train_data)
ix = list(range(num_train))
np.random.shuffle(ix)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = ix[split:], ix[:split]

# create sampler objects using SubsetRandomSampler
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# data loaders preparation
train_loader = DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler)
valid_loader = DataLoader(train_data, batch_size=batch_size,
    sampler=valid_sampler)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [100]:
# Let's check the shape of the input/target data
for data, target in train_loader:
    print(data.shape)
    print(target.shape)
    break

torch.Size([50, 1, 28, 28])
torch.Size([50])


### Custom Weight Initialization:
Below we will use xavier initialization in the linear layers.

In [101]:
from torch import nn, optim
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        #nn.init.normal_(self.fc1.weight, mean=0, std=1)
        nn.init.kaiming_normal_(self.fc1.weight)
        self.fc2 = nn.Linear(512, 256)
        #nn.init.normal_(self.fc2.weight, mean=0, std=1)
        nn.init.kaiming_normal_(self.fc2.weight)
        self.fc3 = nn.Linear(256, 128)
        #nn.init.normal_(self.fc3.weight, mean=0, std=1)
        nn.init.kaiming_normal_(self.fc3.weight)
        self.fc4 = nn.Linear(128, 56)
        #nn.init.normal_(self.fc4.weight, mean=0, std=1)
        nn.init.kaiming_normal_(self.fc4.weight)
        self.fc5 = nn.Linear(56, 10)
        #nn.init.normal_(self.fc5.weight, mean=0, std=1)
        nn.init.kaiming_normal_(self.fc5.weight)


    def forward(self, x):
        # input tensor is flattened
        x = x.view(x.shape[0], -1)

        # applied dropout layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))

        #no dropout at the output layer
        x = self.fc5(x)

        return x

In [102]:
model = Model()

In [103]:
criterion = nn.CrossEntropyLoss()

from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.01)

In [104]:
for epoch in range(1, 11): ## run the model for 11 epochs
    train_loss, valid_loss = [], []
    ## training part
    model.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        ## 1. forward propagation
        output = model(data)

        ## 2. loss calculation
        loss = criterion(output, target)

        ## 3. backward propagation
        loss.backward()

        ## 4. weight optimization
        optimizer.step()

        train_loss.append(loss.item())

    ## evaluation part
    with torch.no_grad():
        model.eval()
        for data, target in valid_loader:
            output = model(data)
            loss = criterion(output, target)
            valid_loss.append(loss.item())
    print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss), "Valid Loss: ", np.mean(valid_loss))

Epoch: 1 Training Loss:  0.49139074483731143 Valid Loss:  0.27911676407481234
Epoch: 2 Training Loss:  0.23060801731189712 Valid Loss:  0.21353163515838483
Epoch: 3 Training Loss:  0.17457188801296677 Valid Loss:  0.19162830716619889
Epoch: 4 Training Loss:  0.14238762469807018 Valid Loss:  0.15832537733173618
Epoch: 5 Training Loss:  0.12051112777165447 Valid Loss:  0.14200689528758328
Epoch: 6 Training Loss:  0.10310808430464628 Valid Loss:  0.129124200627363
Epoch: 7 Training Loss:  0.09007644096758062 Valid Loss:  0.12350966302134718
Epoch: 8 Training Loss:  0.07880066299646084 Valid Loss:  0.11955913926164309
Epoch: 9 Training Loss:  0.07011731284510461 Valid Loss:  0.11444609441095963
Epoch: 10 Training Loss:  0.061662113795561406 Valid Loss:  0.10778228100583268


##### Other weight initializations coding examples:

- <b>Normal Distribution</b>:

```python
nn.init.normal_(self.fc1.weight, mean=0, std=1)
```
- <b>Lecun Normal Distribution</b>:

By default, PyTorch uses Lecun initialization , no code changes are required.

- <b>Kaiming (He) Normal Distribution</b>:

```python
nn.init.kaiming_normal_(self.fc1.weight)
```

### Test the network

In [105]:
# initialize lists to monitor test loss and accuracy
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval() # prep model for evaluation

for data, target in test_loader:
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the loss
    loss = criterion(output, target)
    # update test loss
    test_loss += loss.item()*data.size(0)
    #test_loss.append(loss.item())
    # convert output probabilities to predicted class
    _, pred = torch.max(output, 1)
    # compare predictions to true label
    correct = np.squeeze(pred.eq(target.data.view_as(pred)))
    # calculate test accuracy for each object class
    for i in range(batch_size):
        label = target.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

# calculate and print avg test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))

Test Loss: 0.092873

Test Accuracy of     0: 98% (970/980)
Test Accuracy of     1: 99% (1126/1135)
Test Accuracy of     2: 95% (985/1032)
Test Accuracy of     3: 97% (980/1010)
Test Accuracy of     4: 96% (951/982)
Test Accuracy of     5: 96% (861/892)
Test Accuracy of     6: 97% (930/958)
Test Accuracy of     7: 95% (983/1028)
Test Accuracy of     8: 97% (951/974)
Test Accuracy of     9: 95% (960/1009)

Test Accuracy (Overall): 96% (9697/10000)
