# Tutorial 10

### Today's Topic:
* Batch Normalization
* Residual Neural Network
* Pytorch utilizing GPU speedup 
* Torchani aev computer



## Batch Normalization
Batch normalization (also known as batch norm) is a method used to make artificial neural networks faster and more stable through normalization of the layers' inputs by re-centering and re-scalin
Documentation: https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm2d.html <br>
expected input of size (N, C, H, W) <br>
the Batch Normalization is done over the C dimension, computing statistics on (N, H, W) slices

In [1]:
import torch
from torch import nn
inp = torch.randn(20, 100, 35, 45)
bn = nn.BatchNorm2d(100)
output = bn(inp)
output.shape

  from .autonotebook import tqdm as notebook_tqdm


torch.Size([20, 100, 35, 45])

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv = nn.ModuleList([nn.Conv2d(1,6,kernel_size=3,padding=1), #before pooling (B,6,32,32)
                                  nn.Conv2d(6,24,kernel_size=3,padding=1), # (B,24,16,16)
                                  nn.Conv2d(24,12,kernel_size=5)]) # (B,12,4,4)
        self.pooling = nn.MaxPool2d(kernel_size=2)
        self.fc = nn.ModuleList([nn.Linear(192,192),nn.Linear(192,10)])
        self.activation = nn.ReLU()
        self.bn = [nn.BatchNorm2d(6), nn.BatchNorm2d(24), nn.BatchNorm2d(12)]
        
    def forward(self, x):
        for i in range(2):
            x = self.pooling(self.activation(self.bn[i](self.conv[i](x))))
        x = nn.Flatten()(self.activation(self.bn[2](self.conv[2](x)))) #(N, C, W, H) #(N, C*W*H)
        x = self.activation(self.fc[0](x))
        x = nn.Softmax(dim=-1)(self.fc[1](x))
        return x
    

In [6]:
cnn = CNN()
print(cnn)

CNN(
  (conv): ModuleList(
    (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(6, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2d(24, 12, kernel_size=(5, 5), stride=(1, 1))
  )
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=192, out_features=192, bias=True)
    (1): Linear(in_features=192, out_features=10, bias=True)
  )
  (activation): ReLU()
)


In [4]:
cnn(torch.randn(20, 1, 32, 32)).shape

torch.Size([20, 10])

## Residual Neural Network

### Additive vs concatenative skip connections

![](Additive-skip-connections-vs-concatenative-skip-connections-Rectangles-represent-data.png)


In [18]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv = nn.ModuleList([nn.Conv2d(1,6,kernel_size=3,padding=1), #before pooling (B,6,32,32)
                                  nn.Conv2d(6,24,kernel_size=3,padding=1), # (B,24,16,16)
                                  nn.Conv2d(24,12,kernel_size=5)]) # (B,12,4,4)
        self.pooling = nn.MaxPool2d(kernel_size=2)
        self.fc = nn.ModuleList([nn.Linear(192,192),nn.Linear(192,10)])
        self.activation = nn.ReLU()
        self.bn = [nn.BatchNorm2d(6), nn.BatchNorm2d(24), nn.BatchNorm2d(12)]
        
    def forward(self, inp):
        residual = inp
        x = self.bn[0](self.conv[0](inp))
        x = x + residual 
        x = self.pooling(self.activation(x))
        x = self.pooling(self.activation(self.bn[1](self.conv[1](x))))
        x = nn.Flatten()(self.activation(self.bn[2](self.conv[2](x))))
        residual = x
        y = self.fc[0](x)
        y += residual # to do the skip connection 
        y = self.activation(x)
        y = nn.Softmax(dim=-1)(self.fc[1](y))
        
        return x
    

In [19]:
net = CNN()
print(net)
net(torch.randn(20, 1, 32, 32))

CNN(
  (conv): ModuleList(
    (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(6, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): Conv2d(24, 12, kernel_size=(5, 5), stride=(1, 1))
  )
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): ModuleList(
    (0): Linear(in_features=192, out_features=192, bias=True)
    (1): Linear(in_features=192, out_features=10, bias=True)
  )
  (activation): ReLU()
)


tensor([[0.3983, 0.6847, 0.3903,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 1.2575, 1.8897, 0.0000],
        [0.0000, 0.2984, 1.1979,  ..., 0.6522, 0.0000, 1.2142],
        ...,
        [0.0310, 2.0298, 0.1642,  ..., 0.0000, 0.0000, 0.0000],
        [0.4990, 1.7113, 0.9213,  ..., 0.0000, 0.5603, 0.0000],
        [0.0000, 0.1663, 0.0000,  ..., 0.5010, 0.0000, 0.0000]],
       grad_fn=<ReshapeAliasBackward0>)

## MGCF cluster resources
https://docs.google.com/document/d/1lIkJ6g772Ss5e-4CJ_xGjlVRfOVUq6gYnyGiEhtBc-Q/edit?usp=sharing

## using GPU resources

### checking available resources

In [20]:
torch.cuda.is_available()

False

To get the number of GPUs available.

In [21]:
torch.cuda.device_count()

0

In [22]:
torch.cuda.get_advice_name(0)

AttributeError: module 'torch.cuda' has no attribute 'get_advice_name'

### Move tensors to gpu

By default, the tensors are generated on the CPU. Even the model is initialized on the CPU. Thus one has to manually ensure that the operations are done using GPU. 


In [23]:
X_train = torch.FloatTensor([0., 1., 2.])
x_train.is_cuda


NameError: name 'x_train' is not defined

In [None]:
X_train.get_device()

It's a common PyTorch practice to initialize a variable, usually named device that will hold the device we’re training on (CPU or GPU). 

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
X_train = X_train.to(device)
X_train.is_cuda

In [None]:
X_train.get_device()

The same logic applies to the model. 


In [None]:
model = CNN()
model.to(device)

### Move tensors back to CPU

In [None]:
X_train = X_train.cpu()
X_train.get_device()

In [None]:
X = torch.FloatTensor([0., 1., 2.])
Y = torch.FloatTensor([0., 1., 2.])
X = X.cuda()

In [None]:
X+Y

In [None]:
Y = Y.to(device)

In [None]:
(X+Y).cpu().numpy()

### AEV Computer

In [37]:
import torchani 
import torch
import numpy as np
from pyanitools import anidataloader
# data = anidataloader("../../ANI1_dataset/ANI-1_release/ani_gdb_s07.h5")
data = anidataloader("../ANI-1_release/ani_gdb_s05.h5")
data_iter = data.__iter__()
mols = next(data_iter)
# Extract the data
P = mols['path']
X = mols['coordinates']
E = mols['energies']
S = mols['species']
sm = mols['smiles']

print(S, X.shape)

['C', 'C', 'C', 'C', 'N', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'H'] (10080, 16, 3)


In [35]:
Rcr = 5.2
EtaR = torch.tensor([16], dtype=torch.float)
ShfR = torch.tensor([0.900000,1.168750,1.437500,1.706250,1.975000,2.243750,2.51250,2.781250,3.050000,3.318750,3.587500,3.856250,4.125000,4.39375,4.662500,4.931250])
Rca = 3.5
EtaA = torch.tensor([8], dtype=torch.float)
ShfA = torch.tensor([0.900000,1.550000,2.200000,2.850000], dtype=torch.float)
ShfZ = torch.tensor([0.19634954,0.58904862,0.9817477,1.3744468,1.7671459,2.1598449,2.552544,2.945243]) 
Zeta = torch.tensor([32], dtype=torch.float)
species_order = ['H', 'C', 'N', 'O']
num_species = len(species_order)

aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, ShfR, EtaA, Zeta, ShfA, ShfZ, num_species)

In [39]:
mapping = {"H": 0, "C": 1, "N": 2, "O": 3}
species = np.array([mapping[atom] for atom in S])
species = np.tile(species, (X.shape[0], 1))
species = torch.tensor(species)
X = torch.tensor(X)


In [40]:
aev_computer((species, X)) #species(N, A) [0, 1, 2, 3] # coordinate: (N, A, 3) each atom has 3 dimensional coordinate
# output: (N, A, #num_AEV)


SpeciesAEV(species=tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), aevs=tensor([[[3.4694e-01, 6.2468e-01, 1.1157e-01,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [4.3388e-10, 2.3543e-06, 1.3987e-03,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [3.4627e-01, 6.2463e-01, 1.1217e-01,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         ...,
         [1.2235e-06, 7.9564e-04, 5.1409e-02,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [5.0809e-05, 7.6973e-03, 1.1560e-01,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [5.0809e-05, 7.6973e-03, 1.1560e-01,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00]],

        [[3.3922e-01, 6.2703e-01, 1.1660e-01,  ..., 0.0000e+00,
          0.0000e+00, 0.0000e+00],
         [9.0067e-10, 4.3598e-06, 2.1402e-03,  .