In [94]:
import pandas as pd
import numpy as np
import os
from PIL import Image as PImage
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from typing import List
%matplotlib inline

In [5]:
DATA_FOLDER = './train'

In [9]:
train_df_ = pd.read_csv('./train.csv')
train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']] = train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].astype('uint8')
train_df_.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


Prepare dataset 

In [10]:
for i in range(4):
    train_df = pd.merge(pd.read_parquet(os.path.join(DATA_FOLDER, 'train_image_data_{}.parquet'.format(i))),
                        train_df_, on='image_id').drop(['image_id'], axis=1)
#to image
train_labels = train_df[['grapheme_root','vowel_diacritic','consonant_diacritic','grapheme']]
train_df.drop(['grapheme_root','vowel_diacritic','consonant_diacritic','grapheme'], axis=1, inplace=True)

Split into folds

In [35]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
train_labels['vowel_consonant_diacritic_pair'] = train_labels['vowel_diacritic'].astype(str) + '_' + train_labels['consonant_diacritic'].astype(str)
train_labels['test_fold'] = 0 
for idx, (train_index, test_index) in enumerate(skf.split(X = train_labels['vowel_consonant_diacritic_pair'], 
                                                          y = train_labels['vowel_consonant_diacritic_pair'], 
                                                          groups=train_labels['vowel_consonant_diacritic_pair'])):
    train_labels['test_fold'][test_index] = idx

Prepare Callbacks and Dataset for training

In [82]:
class ImageDataset(Dataset):
    def __init__(self, 
                 df, 
                 labels,
                 transforms=None):
        
        self.df = df
        self.labels = labels
        self.transforms = transforms
        
    def __getitem__(self, idx):
        flattened_image = self.df.iloc[idx].values.astype(np.uint8)
        image = np.expand_dims(flattened_image.reshape(137, 236), 2)
        
        grapheme_root =  self.labels['grapheme_root'].values[idx]
        vowel_diacritic = self.labels['vowel_diacritic'].values[idx]
        consonant_diacritic = self.labels['consonant_diacritic'].values[idx]
        
        if self.transforms is not None:
            augmented = self.transforms(image=image)
            image = augmented['image']
        
        image = torch.from_numpy(image.transpose((2,0,1)))
        grapheme_root = torch.tensor(grapheme_root) 
        vowel_diacritic = torch.tensor(vowel_diacritic) 
        consonant_diacritic = torch.tensor(consonant_diacritic) 
        
        output_dict  = {
            'grapheme_root' : grapheme_root, 
            'vowel_diacritic' : vowel_diacritic, 
            'consonant_diacritic' : consonant_diacritic, 
            'image' : image
                       }

        return output_dict

    def __len__(self):
        return len(self.df)

Make train and validation datasets

In [83]:
batch_size = 16
num_workers = 1

In [84]:
idx = 0
train_fold_idx = train_labels['test_fold'] != idx
val_fold_idx = train_labels['test_fold'] == idx

In [85]:
aug_val = A.Compose([
    A.Normalize(mean=(0.485), std=(0.229))
],
        p=1.0)  
train_dataset = ImageDataset(df = train_df.loc[train_fold_idx, :], 
                             labels = train_labels.loc[train_fold_idx, :], 
                             transforms = aug_val
                            )
val_dataset = ImageDataset(df = train_df.loc[val_fold_idx, :],
                           labels = train_labels.loc[train_fold_idx, :], 
                           transforms = aug_val
                          )
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True,
    shuffle=False   
    )

Callbacks for catalyst

In [None]:
from sklearn.metrics import recall_score

class TaskMetricCallback(Callback):
    '''
    Proposed metrics:
    import numpy as np
    import sklearn.metrics

    scores = []
    for component in ['grapheme_root', 'consonant_diacritic', 'vowel_diacritic']:
        y_true_subset = solution[solution[component] == component]['target'].values
        y_pred_subset = submission[submission[component] == component]['target'].values
        scores.append(sklearn.metrics.recall_score(
            y_true_subset, y_pred_subset, average='macro'))
    final_score = np.average(scores, weights=[2,1,1])
    '''

    def __init__(
        self, 
        input_key: str = "targets", 
        output_key: str = "logits",
        prefix: str = "macro_f1", 
        ignore_index=None
    ):
        super().__init__(CallbackOrder.Metric)
        self.metric_fn = lambda outputs, targets: recall_score(targets, outputs, average="macro")
        self.prefix = prefix
        self.output_key = output_key
        self.input_key = input_key
        self.outputs = []
        self.targets = []
        self.ignore_index = ignore_index

    def on_batch_end(self, state: RunnerState):
        outputs = to_numpy(state.output[self.output_key])
        targets = to_numpy(state.input[self.input_key])

        num_classes = outputs.shape[1]
        outputs = np.argmax(outputs, axis=1)

        if self.ignore_index is not None:
            mask = targets != self.ignore_index
            outputs = outputs[mask]
            targets = targets[mask]

        outputs = [np.eye(num_classes)[y] for y in outputs]
        targets = [np.eye(num_classes)[y] for y in targets]

        self.outputs.extend(outputs)
        self.targets.extend(targets)

        # metric = self.metric_fn(self.targets, self.outputs)
        # state.metrics.add_batch_value(name=self.prefix, value=metric)

    def on_loader_start(self, state):
        self.outputs = []
        self.targets = []

    def on_loader_end(self, state):
        metric_name = self.prefix
        targets = np.array(self.targets)
        outputs = np.array(self.outputs)

        metric = self.metric_fn(outputs, targets)
        state.metrics.epoch_values[state.loader_name][metric_name] = metric