In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.utils.data as data

## Digit Classifier

#### Dataset -> MNIST
#### Framework -> Pytorch, Keras, Tensorflow

- Compute Loss
- Backpropagate and calculate gradients
- Update parameters (Optimizer)
- Specify epochs, batch_size
- Evaluate the model with testset

## Pytorch Example

In [2]:
torch.__version__

'0.4.1'

### Neural Net class

In [3]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net, self).__init__()
        self.conv2d = nn.Conv2d(1,10,kernel_size=3,stride=1, padding=1)
        #self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size*10, num_classes) # hid*10->10 is num_channels
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()
        self.relu = nn.ReLU()
        self.BatchNorm2d = nn.BatchNorm2d(10)  # Arg-> num_Channels
        self.MaxPool2d = nn.MaxPool2d(kernel_size=3,stride=1)
        
    def forward(self,x):
        #x = self.fc1(x)
        #x = self.sigmoid(x)
        #x = self.softmax(x)
        x = self.conv2d(x)
        x = self.BatchNorm2d(x)
        x = self.relu(x)
        x = x.reshape(x.size(0),-1)
        x = self.fc2(x)
        return x

### MNIST DATASET

In [4]:
train_data = torchvision.datasets.MNIST(root = './data',train=True,download=True,transform=transforms.ToTensor())

In [5]:
test_data = torchvision.datasets.MNIST(root = './data',train=False,download=True,transform=transforms.ToTensor())

### Model

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
input_size = 500
hidden_size = 784 #28*28
num_classes = 10

In [8]:
model = Net(input_size, hidden_size, num_classes).to(device)

In [9]:
#loss function
los = nn.CrossEntropyLoss()

#optimizer
#optimizer = optim.SGD(model.parameters(), lr= 0.01)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
# Data-loaders
train_data = data.DataLoader(dataset=train_data,
                            batch_size = 100,
                            shuffle = True)
test_data = data.DataLoader(dataset= test_data,
                           batch_size = 100,
                           shuffle = False)

In [11]:
epochs = 3
total_step = len(train_data)

#Taining the model
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_data):
        #Moving tensors to device
        #images = images.reshape(-1,28*28).to(device)
        images = images.reshape(-1,1,28,28).to(device)
        labels = labels.to(device)
        
        #Forward prop
        out = model(images)
        loss = los(out, labels)
        
        #Backward Prop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1)%600 == 0:
            print ('Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, epochs, i+1, total_step, loss.item()))


Epoch: [1/3], Step: [600/600], Loss: 0.1218
Epoch: [2/3], Step: [600/600], Loss: 0.2984
Epoch: [3/3], Step: [600/600], Loss: 0.0306


In [12]:
# Evaluating the model
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_data:
        images = images.reshape(-1,1,28,28).to(device)
        labels = labels.to(device)
        output = model(images)
        # Here the torch.max is used to choose the max probability of
        # the given no. of classes
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        # Here the .item() is to convert the computed tensor into scalar
    
    print('Accuracy on test images: {} %'.format(100*correct/total))

Accuracy on test images: 98.0 %
