In [1]:
import pandas as pd
import numpy as np
import os
from PIL import Image as PImage
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from typing import List
%matplotlib inline

In [2]:
DATA_FOLDER = './train'

In [3]:
train_df_ = pd.read_csv('./train.csv')
train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']] = train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].astype('uint8')
train_df_.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


Prepare dataset 

In [4]:
for i in range(4):
    train_df = pd.merge(pd.read_parquet(os.path.join(DATA_FOLDER, 'train_image_data_{}.parquet'.format(i))),
                        train_df_, on='image_id').drop(['image_id'], axis=1)
#to image
train_labels = train_df[['grapheme_root','vowel_diacritic','consonant_diacritic','grapheme']]
train_df.drop(['grapheme_root','vowel_diacritic','consonant_diacritic','grapheme'], axis=1, inplace=True)

Split into folds

In [5]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
train_labels['vowel_consonant_diacritic_pair'] = train_labels['vowel_diacritic'].astype(str) + '_' + train_labels['consonant_diacritic'].astype(str)
train_labels['test_fold'] = 0 
for idx, (train_index, test_index) in enumerate(skf.split(X = train_labels['vowel_consonant_diacritic_pair'], 
                                                          y = train_labels['vowel_consonant_diacritic_pair'], 
                                                          groups=train_labels['vowel_consonant_diacritic_pair'])):
    train_labels['test_fold'][test_index] = idx

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html

Prepare Callbacks and Dataset for training

In [6]:
class ImageDataset(Dataset):
    def __init__(self, 
                 df, 
                 labels,
                 transforms=None):
        
        self.df = df
        self.labels = labels
        self.transforms = transforms
        
    def __getitem__(self, idx):
        flattened_image = self.df.iloc[idx].values.astype(np.uint8)
        image = np.expand_dims(flattened_image.reshape(137, 236), 2)
        
        grapheme_root =  self.labels['grapheme_root'].values[idx]
        vowel_diacritic = self.labels['vowel_diacritic'].values[idx]
        consonant_diacritic = self.labels['consonant_diacritic'].values[idx]
        
        if self.transforms is not None:
            augmented = self.transforms(image=image)
            image = augmented['image']
        
        image = torch.from_numpy(image.transpose((2,0,1)))
        grapheme_root = torch.tensor(grapheme_root).long()
        vowel_diacritic = torch.tensor(vowel_diacritic).long()
        consonant_diacritic = torch.tensor(consonant_diacritic).long() 
        
        output_dict  = {
            'grapheme_root' : grapheme_root, 
            'vowel_diacritic' : vowel_diacritic, 
            'consonant_diacritic' : consonant_diacritic, 
            'image' : image
                       }

        return output_dict

    def __len__(self):
        return len(self.df)

Make train and validation datasets

In [7]:
batch_size = 16
num_workers = 1

In [8]:
idx = 0
train_fold_idx = train_labels['test_fold'] != idx
val_fold_idx = train_labels['test_fold'] == idx

In [9]:
aug_val = A.Compose([
    A.Normalize(mean=(0.485), std=(0.229))
],
        p=1.0)  
train_dataset = ImageDataset(df = train_df.loc[train_fold_idx, :], 
                             labels = train_labels.loc[train_fold_idx, :], 
                             transforms = aug_val
                            )
val_dataset = ImageDataset(df = train_df.loc[val_fold_idx, :],
                           labels = train_labels.loc[val_fold_idx, :], 
                           transforms = aug_val
                          )
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True,
    shuffle=False   
    )

Callbacks for catalyst

In [10]:
from sklearn.metrics import recall_score
from catalyst.dl import Callback, RunnerState, MetricCallback, CallbackOrder, CriterionCallback

class TaskMetricCallback(Callback):
    '''
    Proposed metrics:
    import numpy as np
    import sklearn.metrics

    scores = []
    for component in ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic']:
        y_true_subset = solution[solution[component] == component]['target'].values
        y_pred_subset = submission[submission[component] == component]['target'].values
        scores.append(sklearn.metrics.recall_score(
            y_true_subset, y_pred_subset, average='macro'))
    final_score = np.average(scores, weights=[2,1,1])
    '''

    def __init__(
        self, 
        input_key: str = ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic'], 
        output_key: str = ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic'],
        class_names: str = ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic'],
        prefix: str = "taskmetric", 
        ignore_index=None
    ):
        super().__init__(CallbackOrder.Metric)
        self.metric_fn = lambda outputs, targets: recall_score(targets, outputs, average="macro")
        self.prefix = prefix
        self.output_key = output_key
        self.input_key = input_key
        self.class_names = class_names
        self.outputs = [[] for i in range(3)]
        self.targets = [[] for i in range(3)]

    def on_batch_end(self, state: RunnerState):
        
        for i in range(3):
            outputs = state.output[self.output_key[i]].detach().cpu().numpy()
            targets = state.input[self.input_key[i]].detach().cpu().numpy()
            #num_classes = outputs.shape[1]
            outputs = np.argmax(outputs, axis=1)
            #outputs = [np.eye(num_classes)[y] for y in outputs]
            #targets = [np.eye(num_classes)[y] for y in targets]
            self.outputs[i].extend(outputs)
            self.targets[i].extend(targets)

    def on_loader_start(self, state):
        self.outputs = [[] for i in range(3)]
        self.targets = [[] for i in range(3)]

    def on_loader_end(self, state):
        metric_name = self.prefix
        score_vec = []
        for i in range(3):
            targets = np.array(self.targets[i])
            outputs = np.array(self.outputs[i])
            metric = self.metric_fn(outputs, targets)
            score_vec.append(metric)
            state.metrics.epoch_values[state.loader_name][self.class_names[i]] = float(metric)
            
            
        state.metrics.epoch_values[state.loader_name][metric_name] = np.average(score_vec, weights=[2,1,1])

alchemy not available, to install alchemy, run `pip install alchemy-catalyst`.


In [11]:
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self,in_channels,out_channels,stride=1,kernel_size=3,padding=1,bias=False):
        super(ResidualBlock,self).__init__()
        self.cnn1 =nn.Sequential(
            nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding,bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(True)
        )
        self.cnn2 = nn.Sequential(
            nn.Conv2d(out_channels,out_channels,kernel_size,1,padding,bias=False),
            nn.BatchNorm2d(out_channels)
        )
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=stride,bias=False),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.shortcut = nn.Sequential()
    def forward(self,x):
        residual = x
        x = self.cnn1(x)
        x = self.cnn2(x)
        x += self.shortcut(residual)
        x = nn.ReLU(True)(x)
        return x
class ResNet18(nn.Module):    
    def __init__(self):
        super(ResNet18,self).__init__()
        
        self.block1 = nn.Sequential(
            nn.Conv2d(1,64,kernel_size=2,stride=2,padding=3,bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True)
        )
        
        self.block2 = nn.Sequential(
            nn.MaxPool2d(1,1),
            ResidualBlock(64,64),
            ResidualBlock(64,64,2)
        )
        
        self.block3 = nn.Sequential(
            ResidualBlock(64,128),
            ResidualBlock(128,128,2)
        )
        self.block4 = nn.Sequential(
            ResidualBlock(128,256),
            ResidualBlock(256,256,2)
        )
        self.block5 = nn.Sequential(
            ResidualBlock(256,512),
            ResidualBlock(512,512,2)
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        # vowel_diacritic
        self.fc1 = nn.Linear(512,11)
        # grapheme_root
        self.fc2 = nn.Linear(512,168)
        # consonant_diacritic
        self.fc3 = nn.Linear(512,7)
        
    def forward(self,x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x1 = self.fc1(x)
        x2 = self.fc2(x)
        x3 = self.fc3(x)
        return({'vowel_diacritic':x1,
                'grapheme_root':x2,
                'consonant_diacritic':x3})
class ResNet34(nn.Module):    
    def __init__(self):
        super(ResNet34,self).__init__()
        
        self.block1 = nn.Sequential(
            nn.Conv2d(1,64,kernel_size=2,stride=2,padding=3,bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True)
        )
        
        self.block2 = nn.Sequential(
            nn.MaxPool2d(1,1),
            ResidualBlock(64,64),
            ResidualBlock(64,64,2)
        )
        
        self.block3 = nn.Sequential(
            ResidualBlock(64,128),
            ResidualBlock(128,128,2)
        )
        
        self.block4 = nn.Sequential(
            ResidualBlock(128,256),
            ResidualBlock(256,256,2)
        )
        self.block5 = nn.Sequential(
            ResidualBlock(256,512),
            ResidualBlock(512,512,2)
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        # vowel_diacritic
        self.fc1 = nn.Linear(512,11)
        # grapheme_root
        self.fc2 = nn.Linear(512,168)
        # consonant_diacritic
        self.fc3 = nn.Linear(512,7)
    def forward(self,x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        x1 = self.fc1(x)
        x2 = self.fc2(x)
        x3 = self.fc3(x)
        return({'vowel_diacritic':x1,
                'grapheme_root':x2,
                'consonant_diacritic':x3})

In [12]:
model = ResNet18().cuda()

In [13]:
import collections
from catalyst.utils import set_global_seed
from catalyst.dl.runner import SupervisedRunner
from catalyst.dl.callbacks import CriterionCallback, CriterionAggregatorCallback

In [14]:
set_global_seed(42)


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.



In [15]:
loaders = collections.OrderedDict()
loaders["train"] = train_loader
loaders["valid"] = val_loader
runner = SupervisedRunner(input_key='image',
                          input_target_key=None, 
                          output_key=None)

In [16]:
optimizer = torch.optim.AdamW(
    model.parameters(), 
    lr=3e-4, 
    weight_decay=0.001)  
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    factor=0.1, 
    patience=10) 

In [17]:
criterions_dict = {'vowel_diacritic_loss':torch.nn.CrossEntropyLoss(), 
                   'grapheme_root_loss':torch.nn.CrossEntropyLoss(),
                   'consonant_diacritic_loss':torch.nn.CrossEntropyLoss(),}

In [18]:
callbacks=[
    CriterionCallback(input_key='grapheme_root',
                      output_key='grapheme_root',
                      prefix='grapheme_root_loss',
                      criterion_key='grapheme_root_loss', multiplier=2.0),
    CriterionCallback(input_key='vowel_diacritic',
                      output_key='vowel_diacritic',
                      prefix='vowel_diacritic_loss',
                      criterion_key='vowel_diacritic_loss', 
                      multiplier=1.0),
    CriterionCallback(input_key='consonant_diacritic',
                      output_key='consonant_diacritic',
                      prefix='consonant_diacritic_loss',
                      criterion_key='consonant_diacritic_loss', 
                      multiplier=1.0),
    CriterionAggregatorCallback(prefix='loss',
                                loss_keys=['grapheme_root_loss',
                                           'vowel_diacritic_loss',
                                           'consonant_diacritic_loss']),
    TaskMetricCallback()]

In [None]:
runner.train(
    model=model,
    main_metric='loss',
    minimize_metric=True,
    criterion=criterions_dict,
    optimizer=optimizer,
    callbacks=callbacks,
    loaders=loaders,
    logdir='./test_1',
    scheduler=scheduler,
    num_epochs=20,
    verbose=True)  

<IPython.core.display.Javascript object>

1/20 * Epoch (train): 100% 2823/2823 [05:09<00:00,  9.13it/s, consonant_diacritic_loss=0.045, grapheme_root_loss=1.230, loss=1.454, vowel_diacritic_loss=0.179] 
1/20 * Epoch (valid): 100% 316/316 [00:12<00:00, 24.53it/s, consonant_diacritic_loss=0.149, grapheme_root_loss=28.856, loss=37.634, vowel_diacritic_loss=8.628]
[2020-02-10 23:47:40,008] 
1/20 * Epoch 1 (train): _base/lr=0.0003 | _base/momentum=0.9000 | _timers/_fps=2360.2960 | _timers/batch_time=0.0073 | _timers/data_time=0.0011 | _timers/model_time=0.0061 | consonant_diacritic=0.5543 | consonant_diacritic_loss=0.4806 | grapheme_root=0.2467 | grapheme_root_loss=5.1971 | loss=6.3092 | taskmetric=0.4254 | vowel_diacritic=0.6540 | vowel_diacritic_loss=0.6314
1/20 * Epoch 1 (valid): _base/lr=0.0003 | _base/momentum=0.9000 | _timers/_fps=3130.8237 | _timers/batch_time=0.0060 | _timers/data_time=0.0014 | _timers/model_time=0.0045 | consonant_diacritic=0.1394 | consonant_diacritic_loss=3.0568 | grapheme_root=0.0058 | grapheme_root_los