# Mixed Precision on 5-layer CNN 

- Following [Webinar from Nvidia](https://info.nvidia.com/webinar-mixed-precision-with-pytorch-reg-page.html)
- Import PyTorch and [NVIDIA AMP](https://github.com/NVIDIA/apex) Libraries 

In [None]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from apex import amp

### GPU Usage
Check If you are using either Volta or Turing Architecture

In [None]:
device_id_t = torch.cuda.current_device()
device_name_t = torch.cuda.get_device_name(device_id_t)
print(device_name_t)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

#### Set HyperParameters

In [None]:
num_epochs = 5
num_classes = 10
batch_size = 256
learning_rate = 0.001
ite = 1

#### Download dataset from PyTorch torchvision

In [None]:
train_dataset = torchvision.datasets.MNIST(root='MNISTdata/',
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='MNISTdata/',
                                          train=False, 
                                          transform=transforms.ToTensor())

#### Dataloading with Batch Size
- Check how many num_workers you should use using: htop , understand number of CPUs and its usage
- Check how much of GPU you are using during training using : nvidia-smi -lms
- Add pin_memory=True for fast data loading during training.

In [None]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True, num_workers=16, pin_memory=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False, num_workers=16, pin_memory=True)

#### Layer Size Calculate

In [None]:
def size_of_neural_net(in_channels, kernel_size, stride, padding):
    out_width = (in_channels-kernel_size+2*padding)
    out_width = out_width/stride
    return int(out_width+1)

In [None]:
image_size = len(train_dataset.train_data[0][0])
channel_layer = [16,32,64]
KERNEL_SIZE_Conv = 5
STRIDE_SIZE_Conv = 1
PADDING_SIZE_Conv = 2

KERNEL_SIZE_pool = 2
STRIDE_SIZE_pool = 2
PADDING_SIZE_pool = 0

#### Define Network Class and Check How many layers are essential

In [None]:
class ConvNet(nn.Module):
    def __init__(self, num_classes=10, ite=0):
        super(ConvNet, self).__init__()
        self.conv = torch.nn.Sequential()
        self.conv.add_module("conv_1", nn.Conv2d(1, channel_layer[0], kernel_size=KERNEL_SIZE_Conv, stride=STRIDE_SIZE_Conv, padding=PADDING_SIZE_Conv))
        layer_size_00 = size_of_neural_net(image_size,KERNEL_SIZE_Conv, STRIDE_SIZE_Conv, PADDING_SIZE_Conv)
        self.conv.add_module("relu_1", nn.ReLU())
        self.conv.add_module("maxpool_1", nn.MaxPool2d(kernel_size=KERNEL_SIZE_pool, stride=STRIDE_SIZE_pool))
        layer_size_01 = size_of_neural_net(layer_size_00,KERNEL_SIZE_pool, STRIDE_SIZE_pool, PADDING_SIZE_pool)
        lin_input = layer_size_01*layer_size_01*channel_layer[0]
        if (ite > 0):
            self.conv.add_module("conv_2", nn.Conv2d(channel_layer[0], channel_layer[1], kernel_size=KERNEL_SIZE_Conv, stride=STRIDE_SIZE_Conv, padding=PADDING_SIZE_Conv))
            layer_size_10 = size_of_neural_net(layer_size_01,KERNEL_SIZE_Conv, STRIDE_SIZE_Conv, PADDING_SIZE_Conv)
            self.conv.add_module("relu_2", nn.ReLU())
            self.conv.add_module("maxpool_2", nn.MaxPool2d(kernel_size=KERNEL_SIZE_pool, stride=STRIDE_SIZE_pool))
            layer_size_11 = size_of_neural_net(layer_size_10,KERNEL_SIZE_pool, STRIDE_SIZE_pool, PADDING_SIZE_pool)
            lin_input = layer_size_11*layer_size_11*channel_layer[1]
        if (ite > 1):
            self.conv.add_module("conv_3", nn.Conv2d(channel_layer[1], channel_layer[2], kernel_size=KERNEL_SIZE_Conv, stride=STRIDE_SIZE_Conv, padding=PADDING_SIZE_Conv))
            layer_size_20 = size_of_neural_net(layer_size_11,KERNEL_SIZE_Conv, STRIDE_SIZE_Conv, PADDING_SIZE_Conv)
            self.conv.add_module("relu_3", nn.ReLU())
            self.conv.add_module("maxpool_3", nn.MaxPool2d(kernel_size=KERNEL_SIZE_pool, stride=STRIDE_SIZE_pool))
            layer_size_21 = size_of_neural_net(layer_size_20,KERNEL_SIZE_pool, STRIDE_SIZE_pool, PADDING_SIZE_pool)
            lin_input = layer_size_21*layer_size_21*channel_layer[2]
            
        self.fc = torch.nn.Sequential()
        self.fc.add_module("fc1", torch.nn.Linear(lin_input, 512))
        self.fc.add_module("fc2", torch.nn.Linear(512, num_classes))
    def forward(self, x):       
        out = self.conv(x)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [None]:
import time
total_step = len(train_loader)
for i in range(3):
    model = ConvNet(num_classes,i).to(device)
    print(model)
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    model, optimizer = amp.initialize (model, optimizer, opt_level="O1")
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
    
    model.eval()  
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))