## PyToch Deep Learning Basics

In [1]:
import torch

In [2]:
# Define a Tensor
x = torch.Tensor(2,3)

The above line creates a tensor of shape (2,3) => (rows,cols) filled with zero float values.

In [3]:
x

tensor([[                             0., 158456325028528675187087900672.,
                                      0.],
        [158456325028528675187087900672.,                      223346720.,
                              223692192.]])

In [4]:
# Tensors filled with random values
y = torch.rand(2,3)

In [5]:
y

tensor([[0.7468, 0.5394, 0.6498],
        [0.3254, 0.7638, 0.5421]])

In [6]:
# Multiplication and addition of tensors
x1 = torch.ones(2,3)
y1 = torch.ones(2,3) * 9

In [7]:
x1 + y1

tensor([[10., 10., 10.],
        [10., 10., 10.]])

In [8]:
x1

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [9]:
y1

tensor([[9., 9., 9.],
        [9., 9., 9.]])

In [10]:
# Numpy Slice Functionality is also available
y1[:,1] = y1[:,1] + 1

In [11]:
y1

tensor([[ 9., 10.,  9.],
        [ 9., 10.,  9.]])

### Autograd in PyTorch

In [12]:
# Import Dependencies
from torch.autograd import Variable

In [13]:
# Define a Variable from a Tensor
x = Variable(torch.ones(2,2) * 3, requires_grad=True)

In [14]:
x

tensor([[3., 3.],
        [3., 3.]], requires_grad=True)

In the above code line, we define a Variable using PyTorch. We give it the tensor of (2,2) Ones and multiply by 3. Then we set the "requires_grad" flag to True. Setting this flag to true means that this Variable is trainable and when we do a backpropagation on this Variable, we'll see the gradients being updated and the change in weights.

In [15]:
# New Variable using Previous Variable
z = 2 * (x * x) + 9 * x

In [16]:
z

tensor([[45., 45.],
        [45., 45.]], grad_fn=<ThAddBackward>)

z = 2 x^2 + 9x

dz/dx = 4x + 9

For, x = 3, 

Gradient:   dz/dx = 12 + 9 = 21

In [17]:
# Compute the Gradients using Backpropagation
# The backprop will be computed w.r.t some values, so we provide ones(2,2)
# So, the calculation becomes: d/dx.
z.backward(torch.ones(2,2))

In [18]:
# Print the Gradients of x
x.grad

tensor([[21., 21.],
        [21., 21.]])

The computed gradient values match with the ones we derived above.

### Neural Network in PyTorch

In [19]:
# Import Dependencies
from torch import nn
from torch.nn import functional

In [20]:
# Define the Neural Network Class
class neuralNetwork(nn.Module):
    # Define the Skeleton of our Neural Network
    # Initialize Layers every time
    def __init__(self):
        
        # Overwrite the inherited Function with the Instance of base "nn.Module" Class
        super(neuralNetwork, self).__init__()
        
        # Define the Layers of Neural Network
        self.input_dim = 28*28
        self.h1 = 200
        self.h2 = 200
        self.out = 10
        
        self.fc1 = nn.Linear(self.input_dim, self.h1)
        self.fc2 = nn.Linear(self.h1, self.h2)
        self.fc3 = nn.Linear(self.h2, self.out)
        
    # Overwrite the Dummy "Forward" method in the Base Class "nn.Model"
    # Function to do the Forward Pass in the Neural Network
    # x: Input Data
    def forward(self,x):
        # First Layer with ReLU Activation
        x = functional.relu(self.fc1(x))
        # Update x with second Layer Values
        x = functional.relu(self.fc2(x))
        # Output Layer with Softmax Activation for 10 Ouput Classes
        x = self.fc3(x)
        return functional.log_softmax(x)

In [21]:
# Create an Instance of the Network
model = neuralNetwork()

In [22]:
# Print the Model Summary
model

neuralNetwork(
  (fc1): Linear(in_features=784, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fc3): Linear(in_features=200, out_features=10, bias=True)
)

In [23]:
# Train the Model
from torch import optim

In [24]:
# Learning Rate
learning_rate = 0.01

In [25]:
# Define Optimizer: Stochastic Gradient Descent
# model.parameters(): Passes all the parameters of network to optimizer
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

# Define Loss Function: Negative Log Likelihood
loss = nn.NLLLoss()

In [26]:
# Load Dataset
from torchvision import datasets, transforms

In [27]:
# Batch Size
batch_size = 200

# Epochs
epochs = 10

In [28]:
# Load Training Dataset
train_loader = torch.utils.data.DataLoader(datasets.MNIST('./dataset/', train=True, download=True,
                                           transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                                           batch_size=batch_size, shuffle=True)
    
# Load Test Data
test_loader = torch.utils.data.DataLoader(datasets.MNIST('./dataset/', train=False, 
                                                         transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                                          batch_size=batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [29]:
# Train the Model
for epoch in range(epochs):
    for batch_idx, (data, target) in enumerate(train_loader):
        # Convert MNIST Images [data] and Labels [target] into PyTorch Variables
        data, target = Variable(data), Variable(target)
        # Flatten out / Reshape the data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
        data = data.view(-1,28*28)
        # Initialize all Gradients with Zeros before running so that it is ready for the next backpropagation pass
        optimizer.zero_grad()
        # Get the Softmax Output from the Neural Network Class Forward() function
        # data: batch of input data
        output = model(data)
        # Calculate the Loss at Output [Negative Log Likelihood Loss]
        # Output: Output of the Model, Target: Original Label
        loss_val = loss(output, target)
        # Backpropagate the Loss throughout the Neural Network
        loss_val.backward()
        # Execute Gradient Descent Step based on the Gradients Calculated above
        optimizer.step()
        
        # Print Training Results after Every 10 Iterations
        if batch_idx % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                           100. * batch_idx / len(train_loader), loss_val.data[0]))







In the lines above, we run through the code using a nested for loop. On one hand the outer for loop takes all the images and goes through the training process for each epoch, the inner for loop loads the data and runs through the entire training set in batch_sizes that we have defined above.

In [30]:
# Test the Trained Model
test_loss = 0
correct_predictions = 0

In [31]:
# Loop through the Test Data and Get the Trained Model Accuracy
for data, target in test_loader:
    data, target = Variable(data, volatile=True), Variable(target)
    data = data.view(-1,28*28)
    output = model(data)
    # Batch Loss
    test_loss += loss(output, target).data[0]
    # Index of Max Log Probability
    # .max(): returns index of max value in a certain dimension of tensor
    # That index represents the digit label [0,1,2,3,4,5,6,7,8,9]
    # .max(1): returns max value in 2nd dimension
    # .max(0): returns both max value and it's index value
    pred = output.data.max(1)[1]
    # Get Number of Correct Predictions
    correct_predictions += pred.eq(target.data).sum()

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys


In [32]:
# Average Loss
test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct_predictions, len(test_loader.dataset),
        100. * correct_predictions / len(test_loader.dataset)))


Test set: Average loss: 0.0003, Accuracy: 9784/10000 (97%)



So, we see that this simple model gets us to an accuracy of 98% for the MNIST dataset.