# General Process of Deep Learning in PyTorch

In [None]:
import os

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as dutils
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from tqdm.notebook import tqdm

## 1. Global variables

In [None]:
# data_root = '~/Datasets/'
data_root = '/data/ssd/torchvision/'    
batch_size = 32
val_batch_size = 128

try:
    username = os.getlogin()
except OSError:
    username = os.environ.get("USER")

gpu_no = hash(username) % 4

device = torch.device(f'cuda:{gpu_no}' if torch.cuda.is_available() else 'cpu')
print(device)

### 1.1. View storage usage

In [None]:
! df -h

## 2. Data preprocessing

### 2.1. Loading data

References:

`torchvision.datasets`: https://pytorch.org/vision/stable/datasets.html

In [None]:
trainset = datasets.MNIST(data_root, train=True, download=True)
print(trainset)
print('-----')
print(trainset.data.shape, type(trainset.data))


### 2.2 Normalization

References:

`Tensor.float`: https://pytorch.org/docs/stable/generated/torch.Tensor.float.html#torch.Tensor.float

`torch.mean`: https://pytorch.org/docs/stable/generated/torch.mean.html

`torch.std`: https://pytorch.org/docs/stable/generated/torch.std.html

In [None]:
# mean = trainset.data.mean() / 255.       # error
mean = trainset.data.float().mean() / 255.
std = trainset.data.float().std(unbiased=True) / 255.

print(f'mean = {mean} \nstd  = {std}')

### 2.3. Transforms

References:

`torchvision.transforms`: https://pytorch.org/vision/stable/transforms.html

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),                   # .div_(255)
    transforms.RandomRotation(5),
    transforms.Normalize(mean, std),
])

### 2.4 Create dataloaders for training and testing

In [None]:
train_set = datasets.MNIST(data_root, train=True, transform=transform)
test_set = datasets.MNIST(data_root, train=False, transform=transform)
train_set, val_set = dutils.random_split(train_set, [50000, 10000])

train_loader = dutils.DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = dutils.DataLoader(val_set, batch_size=val_batch_size, shuffle=False)
test_loader = dutils.DataLoader(test_set, batch_size=val_batch_size, shuffle=False)


### 2.5. Show sample images

References:

`matplotlib.pyplot`: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.html

In [None]:
def imshow(img):
    img = img * std + mean
    npimg = img.numpy()
    plt.imshow(npimg.transpose((1, 2, 0)))
    plt.axis('off')

data_iter = iter(train_loader)
input, target = data_iter.next()

imshow(torchvision.utils.make_grid(input, nrow=8))


## 3. Create a model

### 3.1. Introduction to CNN

See the tutorials in [cs231n](https://cs231n.github.io/convolutional-networks/) and [theano](http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html).

References:

https://pytorch.org/docs/stable/nn.html

In [None]:
import math

def get_output_size(in_size, kernel_size, stride=1, padding=0, dilation=1):
    return math.floor((in_size + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1)


In [None]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()         # Don't forget
        
        self.feature = nn.Sequential(
            
            nn.Conv2d(1, 8, 3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(8, 16, 3),  # nn.LazyConv2d(16, 3),
            nn.MaxPool2d(2, 2),
            
            nn.Conv2d(16, 8, 3),  # nn.LazyConv2d(8, 3),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(8*2*2, 32), # nn.LazyLinear(32),
            nn.ReLU(True),
            nn.Linear(32, 10)
        )
        
    def forward(self, x):
        x = self.feature(x)
        x = self.classifier(x)
        return x            # outputs logits
    

### 3.2. Create a model object

In [None]:
model = CNN()
model = model.to(device)

### 3.3. Test the model

In [None]:
x = torch.rand(16, 1, 28, 28)   # (B, C, H, W)
y = model(x.to(device))         # (B, L)

print(y.size())

## 4. Loss function

References:

https://pytorch.org/docs/stable/nn.html#loss-functions

In [None]:
criterion = nn.CrossEntropyLoss()
# criterion = criterion.to(device)

## 5. Optimizer

References:

https://pytorch.org/docs/stable/optim.html

https://github.com/Jaewan-Yun/optimizer-visualization

In [None]:
optimizer = optim.SGD([
    {'params': model.feature.parameters()},
    {'params': model.classifier.parameters(), 'lr': 1e-3}
], lr=1e-2, momentum=0.9)


## 6. Training

In [None]:
def _val(dataloader, **kwargs):
    model.eval()

    total = 0
    correct = 0

    with torch.no_grad():
        with tqdm(dataloader, **kwargs) as batches:
            for batch in batches:
                input, target = batch
                input, target = input.to(device), target.to(device)

                output = model(input)
                _, prediction = torch.max(output, 1)

                total += target.size(0)
                correct += int((prediction == target).sum())

    accuracy = correct / total        
    return accuracy


def validate():
    return _val(val_loader, desc='Validating', leave=False)

def test():
    return _val(test_loader, desc='Testing')


In [None]:
def train(num_epochs):
    model.train()   # set the model in training mode
    

    with tqdm(range(1, num_epochs+1), desc='Training') as epochs:
        for epoch in epochs:
            train_loss = 0.0
            
            with tqdm(train_loader, desc=f'Epoch {epoch}') as batches:
                loss_step = 0.0
                
                for step, batch in enumerate(batches, 1):
                    input, target = batch
                    input, target = input.to(device), target.to(device)
                    
                    optimizer.zero_grad()    # you can also accumulate gradients of multiple steps as learning from a larger batch
                    
                    output = model(input)
                    loss = criterion(output, target)
                    loss.backward()     # calculate gradients
                    
                    optimizer.step()        # performs gradient descent
                    
                    loss_step += float(loss)
                    if step % 50 == 0:
                        batches.set_postfix({
                            'steps': step,
                            'loss_step': f'{loss_step/50:.3f}'
                        })
                        loss_step = 0
                        
                    train_loss += float(loss)
                
                val_acc = validate()
                epochs.set_postfix({
                    'loss': f'{train_loss/step:.3f}',
                    'val_acc': f'{val_acc:.3f}'
                })
        
        

In [None]:
train(5)

## 7. Testing

In [None]:
acc = test()
print(f'Test accuracy: {acc}')


## 8. Shutdown

From Jupyter home, click `Running`, shutdown this kernel.