In [1]:
%matplotlib inline
%config InlineBackend.figure_format ='retina'
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torchvision.transforms as transforms
import torchvision.datasets as dsets


torch.manual_seed(13)

<torch._C.Generator at 0x1a18c65cf0>

# Deep Neural Networks

Use `nn.ModuleList` to build Nets with arbitrary number of hidden layers

In [6]:
import torch.nn as nn


class Net(nn.Module):
    def __init__(self, dims):
        super(Net, self).__init__()
        
        self.hidden = nn.ModuleList()
        for in_dim, out_dim in zip(dims, dims[1:]):
            self.hidden.append(nn.Linear(in_dim, out_dim))
            
    def forward(self, x):
        L = len(self.hidden)
        
        z = x
        for i in range(L):
            z = self.hidden[i](z)
            if i < L - 1:
                z = torch.relu(z)
        return z

Apply model to MNIST dataset from previous week.

In [3]:
from torch.utils.data import Dataset, DataLoader


train_dataset = dsets.MNIST(root='../../week_4/data', train=True, download=False, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='../../week_4/data', train=False, download=False, transform=transforms.ToTensor())

# Data
train_dataloader = DataLoader(dataset=train_dataset, batch_size=100)
validation_loader = DataLoader(dataset=validation_dataset, batch_size=100)

In [4]:
def train(train_dataloader, validation_loader, model, loss_f, optimizer, num_epoch=10):
    N_test = len(validation_loader)

    # Training Loop
    losses = []
    accuracy = []
    for epoch in range(num_epoch):
        total_loss = 0
        for x, y in train_dataloader:
            y_hat = model(x.view(-1, 28 * 28))
            loss = loss_f(y_hat, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        losses.append(total_loss)

        correct = 0
        for x_test, y_test in validation_loader:
            z = model(x_test.view(-1, 28 * 28))
            _, yhat = torch.max(z.data, 1)
            correct += (yhat == y_test).sum().item()

        accuracy.append(correct / N_test)
    return losses, accuracy

In [7]:
from torch.nn import CrossEntropyLoss


# Model
model = Net([28 * 28, 100, 100, 10])

# Loss
loss_f = CrossEntropyLoss()

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

losses, accuracy = train(train_dataloader, validation_loader, model, loss_f, optimizer, num_epoch=10)

In [8]:
accuracy[-1]

97.38

## Dropout

In [9]:
class Net(nn.Module):
    
    # Constructor
    def __init__(self, in_size, n_hidden, out_size, p=0):
        super(Net, self).__init__()
        self.drop = nn.Dropout(p=p)
        self.linear1 = nn.Linear(in_size, n_hidden)
        self.linear2 = nn.Linear(n_hidden, n_hidden)
        self.linear3 = nn.Linear(n_hidden, out_size)
    
    # Prediction function
    def forward(self, x):
        x = self.linear1(x)
        x = self.drop(x)
        x = torch.relu(x)
        
        x = self.linear2(x)
        x = self.drop(x)
        x = torch.relu(x)
        
        x = self.linear3(x)
        return x

In [10]:
model = Net(28 * 28, 100, 10)
model_drop = Net(28 * 28, 100, 10, p=0.01)

**First** We need to set our model in the training mode in order to activate dropout

In [11]:
model_drop.train()

Net(
  (drop): Dropout(p=0.01)
  (linear1): Linear(in_features=784, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)

In [12]:
# With dropout
optimizer = torch.optim.SGD(model_drop.parameters(), lr=0.1)
_, accuracy = train(train_dataloader, validation_loader, model_drop, loss_f, optimizer, num_epoch=10)
accuracy[-1]

97.1

In [13]:
# Without dropout
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
_, accuracy = train(train_dataloader, validation_loader, model, loss_f, optimizer, num_epoch=10)
accuracy[-1]

97.3

**Second** For evaluation we need to turnoff dropout mechanism

In [14]:
model_drop.eval()

Net(
  (drop): Dropout(p=0.01)
  (linear1): Linear(in_features=784, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)

## Weights Initialization 

Example `torch.nn.init.xavier_uniform_(linear.weight)`

In [17]:
class Net_Xavier(nn.Module):
    
    def __init__(self, dims):
        super(Net, self).__init__()

        self.hidden = nn.ModuleList()
        for in_dim, out_dim in zip(dims, dims[1:]):
            linear = nn.Linear(in_dim, out_dim)
            # !!! Weight initializer
            torch.nn.init.xavier_uniform_(linear.weight)
            self.hidden.append(linear)
            
    def forward(self, x):
        L = len(self.hidden)
        
        z = x
        for i in range(L):
            z = self.hidden[i](z)
            if i < L - 1:
                z = torch.relu(z)
        return z

## Gradient Descent With Momentum


In PyTorch: Set `momentum` parameter in `torch.optim.SGD` to non-zero value