# 8.0 Complete Training and Validation Pipeline

In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from matplotlib import pyplot as plt
from matplotlib import rcParams
import numpy as np
from pathlib import Path
import os
import gc
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import joblib 
import warnings
import random
import time
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from sklearn.metrics import recall_score

In [4]:
rcParams['figure.figsize'] = (8, 4)
plt.style.use('fivethirtyeight')
pd.set_option('max_columns', 100)
pd.set_option("display.precision", 4)
warnings.simplefilter('ignore')

In [5]:
import pretrainedmodels
# https://github.com/Cadene/pretrained-models.pytorch

import torch
from torch import nn
from torch import optim
import torch.nn as nn  
from torch.nn import functional as F 
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import Dataset, DataLoader 

In [6]:
import torch
torch.cuda.is_available()

True

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
device

device(type='cuda', index=0)

In [9]:
# https://albumentations.ai/docs/getting_started/image_augmentation/
# https://hoya012.github.io/blog/albumentation_tutorial/
import albumentations as A
from albumentations.pytorch import ToTensor

from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose, RandomGamma, 
    ElasticTransform, ChannelShuffle,RGBShift, Rotate, Cutout
)

In [10]:
!ls ../input/

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [11]:
# YOUR PATH
# Linux (WSL)
#path = f'/mnt/c/Users/bokhy/Desktop/Python/github/kaggle/bengaliai-cv19/input/'

# Windows
# YOUR PATH
path = 'C:\\Users\\bokhy\\Desktop\\Python\\github\\kaggle\\bengaliai-cv19\\input\\'

In [12]:
# Set Seed
SEED = 623

# Linux (WSL)
#def seed_everything(seed):
#    random.seed(seed)
#    os.environ['PYTHONHASHSEED'] = str(seed)
#    np.random.seed(seed)
#    torch.manual_seed(seed)
#    torch.cuda.manual_seed(seed)
#    torch.backends.cudnn.deterministic = True
#    torch.backends.cudnn.benchmark = True

# Windows
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

### 8.1 Read Dataset

#### For details, please refer to '01_data_loading_and_sampling.ipynb' for stratified sampling

In [13]:
df_train = pd.read_csv(os.path.join(path, 'train.csv'))
df_train['fold'] = pd.read_csv(os.path.join(path, 'df_folds.csv'))['fold']

In [14]:
df_train.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,fold
0,Train_0,15,9,5,ক্ট্রো,1
1,Train_1,159,0,0,হ,4
2,Train_2,22,3,5,খ্রী,1
3,Train_3,53,2,2,র্টি,2
4,Train_4,71,9,5,থ্রো,0


### 8.2 Split for Training/Validation dataset

#### For details, please refer to '03_Pytoch_dataset_creation_training_validation.ipynb'

In [15]:
index = 0
HEIGHT = 137
WIDTH = 236

trn_fold = [i for i in range(6) if i not in [5]]
vld_fold = [5]

trn_idx = df_train.loc[df_train['fold'].isin(trn_fold)].index
vld_idx = df_train.loc[df_train['fold'].isin(vld_fold)].index

label_1 = df_train.iloc[index].grapheme_root
label_2 = df_train.iloc[index].vowel_diacritic
label_3 = df_train.iloc[index].consonant_diacritic

### 8.3 Define Pytorch Dataset Class

#### For details, please refer to '05_image_augmentation_1_albumentation.ipynb'

In [16]:
class BengaliDataset(Dataset):
    
    def __init__(self, csv, img_height, img_width, transform):
        # To make sure having ordered index with the 'fold'
        self.csv = csv.reset_index()
        # call '.values' to get numpy values
        self.img_ids = csv['image_id'].values
        self.img_height = img_height
        self.img_width = img_width
        self.transform = transform

    # The lenght of the whole data --> In our case, the lengh of the train CSV file    
    def __len__(self):
        return len(self.csv)

    def __getitem__(self, index):
        img_id = self.img_ids[index]
        img = joblib.load(os.path.join(path, f'train_images/{img_id}.pkl'))
        img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
        img = 255 - img
        # new axis to make it 3-dimension
        img = img[:, : , np.newaxis]    
        # This means we repeat the image 3 times in 3 dimension (0,1,2,3..)
        img = np.repeat(img, 3, 2)
        
        if self.transform is not None:
            img = self.transform(image = img)['image']
        
        label_1 = self.csv.iloc[index].grapheme_root
        lable_2 = self.csv.iloc[index].vowel_diacritic
        lable_3 = self.csv.iloc[index].consonant_diacritic
        
        # Unlike last time, we do not have to manually convert the image result to Pytorch
        # because in Albumentation, we defined it to be Pytorch Tensor. So just return itself
        
        # And also, we output it to numpy array for easy training
        return img, np.array([label_1, label_2, label_3])

### 8.4 Image Augmentation

#### For details, please refer to '05_image_augmentation_1_albumentation.ipynb'

In [17]:
train_augmentation = Compose([
        Rotate(30),
        Flip(),
        OneOf([
            IAAAdditiveGaussianNoise(),
            GaussNoise(),
        ], p=0.2),
        OneOf([
            MotionBlur(p=0.2),
            MedianBlur(blur_limit=3, p=0.1),
            Blur(blur_limit=3, p=0.1),
        ], p=0.2),
        ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=.2),
        OneOf([
            OpticalDistortion(p=0.3),
            GridDistortion(p=.1),
            IAAPiecewiseAffine(p=0.3),
        ], p=0.2),
        OneOf([
            CLAHE(clip_limit=2),
            IAASharpen(),
            IAAEmboss(),
            RandomContrast(),
            RandomBrightness(),
        ], p=0.3),
        HueSaturationValue(p=0.3),
        ToTensor()
    ])

# No Augmentation to Validset
valid_augmentation = Compose([
        ToTensor()
    ])

### 8.5 Define Pytorch DataLoader

#### 8.5.1 Create Training/Validation dataset

In [18]:
trn_dataset = BengaliDataset(csv = df_train.loc[trn_idx][:1000],
                            img_height = HEIGHT,
                            img_width = WIDTH,
                            transform = train_augmentation) 

vld_dataset = BengaliDataset(csv = df_train.loc[vld_idx][:1000],
                            img_height = HEIGHT,
                            img_width = WIDTH,
                            transform = valid_augmentation) 

In [19]:
# Import to check that all tensor for each image should fall between 0 and 1, not between 0 and 255 for nomal image pixel
trn_dataset[0][0]

tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0078, 0.0157, 0.0000,  ..., 0.0000, 0.0000, 0.0078],
         [0.0000, 0.0000, 0.0000,  ..., 0.0078, 0.0000, 0.0000],
         ...,
         [0.0941, 0.0510, 0.0118,  ..., 0.0078, 0.0000, 0.0118],
         [0.0549, 0.0039, 0.0000,  ..., 0.0275, 0.0275, 0.0000],
         [0.0118, 0.0353, 0.0000,  ..., 0.0039, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0078, 0.0157, 0.0000,  ..., 0.0000, 0.0000, 0.0078],
         [0.0000, 0.0000, 0.0000,  ..., 0.0078, 0.0000, 0.0000],
         ...,
         [0.0941, 0.0510, 0.0118,  ..., 0.0078, 0.0000, 0.0118],
         [0.0549, 0.0039, 0.0000,  ..., 0.0275, 0.0275, 0.0000],
         [0.0118, 0.0353, 0.0000,  ..., 0.0039, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0078, 0.0157, 0.0000,  ..., 0.0000, 0.0000, 0.0078],
         [0.0000, 0.0000, 0.0000,  ..., 0.0078, 0.0000, 0.

In [20]:
trn_dataset[0][0].min()

tensor(0.0745)

In [21]:
trn_dataset[0][0].max()

tensor(0.9412)

In [22]:
trn_dataset[0][0].shape

torch.Size([3, 137, 236])

In [23]:
vld_dataset[0][0].shape

torch.Size([3, 137, 236])

#### 8.5.2 Create Training/Validation DataLoader

#### DataLoader helps to train in batches

In [24]:
trn_loader = DataLoader(trn_dataset,
                       shuffle = True,  # Always True for "train loader"
                       num_workers = 0, 
#                        persistent_workers =True,
                       batch_size = 64 # or 8 or 256
                       )

vld_loader = DataLoader(vld_dataset,
                       shuffle = False,
                       num_workers = 0,  # Always False for "Valid loader"
#                        persistent_workers =True,
                       batch_size = 64 # or 8 or 256
                       )

In [25]:
# Check if the DataLoader is working properly:
for inputs, targets in trn_loader:  
    print('Image batch dimensions:', inputs.shape)
    print('targets dimensions:', targets.shape)
    break

Image batch dimensions: torch.Size([64, 3, 137, 236])
targets dimensions: torch.Size([64, 3])


### 8.6 Load the Pre-Trained Model, create optimizer, loss, and shceduler

#### For details, please refer to '04_transfer_learning_CNN.ipynb'

In [26]:
model_name = 'resnet34'
model = pretrainedmodels.__dict__[model_name](pretrained='imagenet')

In [27]:
# change the last layer to fit our task
in_features = model.last_linear.in_features
# total output numbers are 186 
model.last_linear = torch.nn.Linear(in_features, 186)

In [28]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [29]:
# Use GPU computing
model = model.cuda()

In [30]:
# Create Optimizer (make it learn parameters of our model)
optimizer = torch.optim.AdamW(model.parameters(), lr = 0.001)

In [31]:
loss_fn = nn.CrossEntropyLoss()

# Helps training 
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                      mode = 'max',
                                                      verbose = True,
                                                      patience = 7,
                                                      factor = 0.5)

### 8.7 Train the model

In [32]:
from tqdm import tqdm

In [33]:
best_score = -1

In [33]:
start_time = time.time()

for e in range(10):
    
    train_loss = []

    # this means that 'we start the training!'
    model.train()

    # tqdm_notebook wrap would show the whole epochs
    for inputs, targets in tqdm(trn_loader):

        # IF using GPU
        #inputs = inputs.to(device)
        #targets = targets.to(device)

        logits = model(inputs)

        # For each batch, they are in order
        grapheme = logits[:, :168]
        vowel = logits[:, 168:179]
        cons = logits[:, 179:]

        # targets[:,0] --> first column in targets is the grapheme
        # targets[:,1] --> second column is the vowel
        # targets[:,2] --> third  column is the consonants

        # Competition says that it gives (2,1,1) weight to each targets, so we compute the loss with the same weight
        loss = 0.5 * loss_fn(grapheme, targets[:,0]) + 0.25 * loss_fn(vowel, targets[:,1]) + 0.25 * loss_fn(cons, targets[:,2]) 

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        train_loss.append(loss.item())

    ### 8.7 Model Validation

    val_loss = []
    val_true = []
    val_pred = []

    model.eval()

    # NO gradient descent should be applied to validation
    with torch.no_grad():
        for inputs, targets in tqdm(vld_loader):

            # IF using GPU
            #inputs = inputs.to(device)
            #targets = targets.to(device)

            logits = model(inputs)

            grapheme = logits[:, :168]
            vowel = logits[:, 168:179]
            cons = logits[:, 179:]

            # targets[:,0] --> first column in targets is the grapheme
            # targets[:,1] --> second column is the vowel
            # targets[:,2] --> third  column is the consonants
            loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:,2]) 

            val_loss.append(loss.item())

            grapheme = grapheme.cpu().argmax(dim=1).data.numpy()
            vowel    = vowel.cpu().argmax(dim=1).data.numpy()
            cons     = cons.cpu().argmax(dim=1).data.numpy()

            # Consolidate result for all 3 
            val_true.append(targets.cpu().numpy())

            # Stack all gives the same result for each 
            val_pred.append(np.stack([grapheme,vowel,cons], axis = 1))


    # val_true has the shape of the final result
    val_true = np.concatenate(val_true)
    val_pred = np.concatenate(val_pred)

    val_loss = np.mean(val_loss)
    train_loss = np.mean(train_loss)

    score_g = recall_score(val_true[: , 0], val_pred[:, 0], average = 'macro')
    score_v = recall_score(val_true[: , 1], val_pred[:, 1], average = 'macro')
    score_c = recall_score(val_true[: , 2], val_pred[:, 2], average = 'macro')

    final_score = np.average([score_g , score_v , score_c], weights = [2,1,1])

    print(f'train_loss: {train_loss: .5f}; val_loss: {val_loss: .5f}; score: {final_score: .5f}')
    print(f'score_g: {score_g: .5f}; score_v: {score_v: .5f}; score_c: {score_c: .5f}')

    print('Time elapsed: %.2f min' % ((time.time() - start_time)/60)) 

    if final_score > best_score:
        best_score = final_score

        state_dict = model.cpu().state_dict()
        model = model.cpu()
        torch.save(state_dict, 'my_model.pt')

print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))           

  0%|                                                                                           | 0/16 [00:00<?, ?it/s]


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

In [None]:
# To see model's weight
model.load_state_dict(torch.load('./model.pt'))

## 8.8 Test the Model

In [None]:
test_acc = 0.0
for i, (images, labels) in enumerate(testloader, 0):
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    test_acc += get_accuracy(outputs, labels, BATCH_SIZE)
        
print('Avg. Test Accuracy: %.2f'%( test_acc/i))