# Tutorial 8
## Outline
* Tensorboard for monitoring training progress
* Convolutional neural networks
    * Parameters of convolution: units, kernel size, stride, padding & dilation
    * Calculation of output shape from input shape and convolution parameters
    * Pooling operations
    * building a CNN using PyTorch
* Q&A on HW#7

## Tensorboard for monitoring training progress

You may need to install the following packages: <br>
**conda install -c conda-forge tensorboard** <br>
**pip install torch-summary**

Let's retry the HW5 problem:

In [4]:
import pandas as pd
import numpy as np
data=pd.read_csv("./../HW-02/wines.csv")
data.head()

Unnamed: 0,Alcohol %,Malic Acid,Ash,Alkalinity,Mg,Phenols,Flavanoids,Phenols.1,Proantho-cyanins,Color intensity,Hue,OD280 315,Proline,Start assignment,ranking
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,1,1
1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,1,1
2,14.83,1.64,2.17,14.0,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045,1,1
3,14.12,1.48,2.32,16.8,95,2.2,2.43,0.26,1.57,5.0,1.17,2.82,1280,1,1
4,13.75,1.73,2.41,16.0,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320,1,1


In [5]:
from sklearn.preprocessing import StandardScaler

x=data.drop(["Start assignment","ranking"],axis=1).values
y=data['ranking'].values

scaler=StandardScaler()
x_norm=scaler.fit_transform(x)
# y=y.reshape(-1,1)
print(x_norm.shape)

(178, 13)


In [6]:
from pylab import *
from tqdm import tqdm
from sklearn.model_selection import train_test_split

def train_and_val(model,train_X,train_y,epochs,draw_curve=False,tensorboard_logger=None):
    """
    Parameters
    --------------
    model: a PyTorch model
    train_X: np.array shape(ndata,nfeatures)
    train_y: np.array shape(ndata)
    epochs: int
    draw_curve: bool
    """
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
    train_X = torch.tensor(train_X, dtype=torch.float)
    train_y = torch.tensor(train_y, dtype=torch.long)
    val_array=[]
    
    # Split training examples further into training and validation
    train_X,val_X,train_y,val_y=train_test_split(train_X,train_y,test_size=0.2)
    weights = model.state_dict()
    lowest_val_loss = np.inf
    
    for i in tqdm(range(epochs)):
        pred = model(train_X)
        # in order to work with cross entropy loss, we shift the classes from [1,2,3] to [0,1,2]
        loss = loss_func(pred, train_y-1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        #validation
        with torch.no_grad():
            pred = model(val_X)
            val_loss = loss_func(pred, val_y-1)
        val_array.append(val_loss.item())
        
        if val_loss < lowest_val_loss:
                lowest_val_loss = val_loss
                weights = model.state_dict()
        
        acc = calculate_accuracy_NN(model,train_X,train_y)
        val_acc = calculate_accuracy_NN(model,val_X,val_y)
        if tensorboard_logger is not None:
                tensorboard_logger.add_scalar("losses", loss, i + 1)
                tensorboard_logger.add_scalar("accuracies", acc, i + 1)
                tensorboard_logger.add_scalar("val_losses", val_loss, i + 1)
                tensorboard_logger.add_scalar("val_accuracies", val_acc, i + 1)
                
     # The final number of epochs is when the minimum error in validation set occurs    
    final_epochs=np.argmin(val_array)+1
    print("Number of epochs with lowest validation:",final_epochs)
    # Recover the model weight, and train with full training data (including validation data)
    model.load_state_dict(weights)

    if draw_curve:
        plt.figure()
        plt.plot(np.arange(len(val_array))+1,val_array,label='Validation loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()

def calculate_accuracy_NN(model,xs,ys):
    with torch.no_grad():
        if not torch.is_tensor(xs):
            xs = torch.tensor(xs,dtype=torch.float)
        pred = model(xs)
        pred= torch.argmax(pred,dim=1)
    pred = pred.detach().numpy()  
    if torch.is_tensor(ys):
        ys = ys.detach().numpy()  
    return np.sum(ys==pred+1)/len(ys)

In [7]:
import torch
from torch import nn
class MLPNet(nn.Module):
    def __init__(self):
        super(MLPNet, self).__init__()
        self.fc = nn.ModuleList([nn.Linear(13,20), nn.Linear(20,10), nn.Linear(10,3)])
        self.activation = nn.Tanh()
        
    def forward(self, x):
        for i in range(2):
            x = self.activation(self.fc[i](x))
        x = nn.Softmax(dim=-1)(self.fc[-1](x))
        return x

We can use torch summary to visulize the structure and number of parameters in a model

In [8]:
from torchsummary import summary
model = MLPNet()
summary(model, (1,13))

Layer (type:depth-idx)                   Output Shape              Param #
├─ModuleList: 1                          []                        --
|    └─Linear: 2-1                       [-1, 1, 20]               280
├─Tanh: 1-1                              [-1, 1, 20]               --
├─ModuleList: 1                          []                        --
|    └─Linear: 2-2                       [-1, 1, 10]               210
├─Tanh: 1-2                              [-1, 1, 10]               --
├─ModuleList: 1                          []                        --
|    └─Linear: 2-3                       [-1, 1, 3]                33
Total params: 523
Trainable params: 523
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00


Layer (type:depth-idx)                   Output Shape              Param #
├─ModuleList: 1                          []                        --
|    └─Linear: 2-1                       [-1, 1, 20]               280
├─Tanh: 1-1                              [-1, 1, 20]               --
├─ModuleList: 1                          []                        --
|    └─Linear: 2-2                       [-1, 1, 10]               210
├─Tanh: 1-2                              [-1, 1, 10]               --
├─ModuleList: 1                          []                        --
|    └─Linear: 2-3                       [-1, 1, 3]                33
Total params: 523
Trainable params: 523
Non-trainable params: 0
Total mult-adds (M): 0.00
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00

Tensorboard: https://pytorch.org/docs/stable/tensorboard.html

In [10]:
from torch.utils.tensorboard import SummaryWriter
mlp_logger = SummaryWriter(log_dir="MLP")
mlp_logger.flush()

In [11]:
train_and_val(model, x_norm, y, 1000, tensorboard_logger=mlp_logger)

100%|██████████| 1000/1000 [00:01<00:00, 581.97it/s]

Number of epochs with lowest validation: 1000





To view training curves on Tensorboard, go to command line and run: <br>
**tensorboard --logdir=path-to-notebook** <br>
Then open the url in your browser

In [13]:
# tensorboard --logdir=MLP  

# CNN




## CNN general architechture
![](https://cdn-images-1.medium.com/max/800/1*lvvWF48t7cyRWqct13eU0w.jpeg)


## Convolution Filters help extract features
![](https://qph.fs.quoracdn.net/main-qimg-50915e66f98186a786b3d0344eea9aba-pjlq)
## Calculating convolution output shape
Here is a [visualiztion](https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md) for padding, stride and dilation
$$
H_{\text {out }}=\left[\frac{H_{\text {in }}+2 \times \text { padding }-\operatorname{dilation} \times(\text { kernel_size }-1)-1}{\text { stride }}+1\right]
$$


### LeNet architecture
LeCun, Y.; Bottou, L.; Bengio, Y. & Haffner, P. (1998). Gradient-based learning applied to document recognition.Proceedings of the IEEE. 86(11): 2278 - 2324. ([Link](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf))

|Layer No.|Layer type|#units|Kernel size|Stride|Activation|
|---|---|---|---|---|---|
|1|2D Convolution|6|5|1|tanh|
|2|Average pooling|6|2|2|\\|
|3|2D Convolution|16|5|1|tanh|
|4|Average pooling|16|2|2|\\|
|5|2D Convolution|120|5|1|tanh|
|6|Flatten|\\|\\|\\|\\|
|7|Fully connected|84|\\|\\|tanh|
|8|Fully connected|10|\\|\\|softmax|


#neurons in each layer: 1024->256->84->10

activation: tanh

In [14]:
from torch import nn
import torch
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.ModuleList([
            nn.Conv2d(1,    6, kernel_size=5, stride=1),   # (B, 6,   28, 28) -> (B, 6,   14, 14)
            nn.Conv2d(6,   16, kernel_size=5, stride=1),   # (B, 16,  10, 10) -> (B, 16,  5,   5)
            nn.Conv2d(16, 120, kernel_size=5, stride=1)    # (B, 120, 1 ,  1)
        ])
        
        self.pooling = nn.AvgPool2d(kernel_size=2)
        
        self.fc = nn.ModuleList([
            nn.Linear(120, 84),
            nn.Linear(84, 10)
        ])
        
        self.activation = nn.Tanh()
        
    def forward(self, x):
        for i in range(2):
            x = self.pooling(self.activation(self.conv[i](x)))
        x = self.activation(self.conv[-1](x))
        x = nn.Flatten()(x)
        x = self.activation(self.fc[0](x))
        x = nn.Softmax(dim=-1)(self.fc[1](x))
        return x
    


In [17]:
model_conv = LeNet()
summary(model_conv, (1,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
├─ModuleList: 1                          []                        --
|    └─Conv2d: 2-1                       [-1, 6, 28, 28]           156
├─Tanh: 1-1                              [-1, 6, 28, 28]           --
├─AvgPool2d: 1-2                         [-1, 6, 14, 14]           --
├─ModuleList: 1                          []                        --
|    └─Conv2d: 2-2                       [-1, 16, 10, 10]          2,416
├─Tanh: 1-3                              [-1, 16, 10, 10]          --
├─AvgPool2d: 1-4                         [-1, 16, 5, 5]            --
├─ModuleList: 1                          []                        --
|    └─Conv2d: 2-3                       [-1, 120, 1, 1]           48,120
├─Tanh: 1-5                              [-1, 120, 1, 1]           --
├─ModuleList: 1                          []                        --
|    └─Linear: 2-4                       [-1, 84]                  10,164
├─T

Layer (type:depth-idx)                   Output Shape              Param #
├─ModuleList: 1                          []                        --
|    └─Conv2d: 2-1                       [-1, 6, 28, 28]           156
├─Tanh: 1-1                              [-1, 6, 28, 28]           --
├─AvgPool2d: 1-2                         [-1, 6, 14, 14]           --
├─ModuleList: 1                          []                        --
|    └─Conv2d: 2-2                       [-1, 16, 10, 10]          2,416
├─Tanh: 1-3                              [-1, 16, 10, 10]          --
├─AvgPool2d: 1-4                         [-1, 16, 5, 5]            --
├─ModuleList: 1                          []                        --
|    └─Conv2d: 2-3                       [-1, 120, 1, 1]           48,120
├─Tanh: 1-5                              [-1, 120, 1, 1]           --
├─ModuleList: 1                          []                        --
|    └─Linear: 2-4                       [-1, 84]                  10,164
├─T