In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import joblib
from tqdm import tqdm
import os
import random

%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Cutout, Rotate, Normalize
)
from albumentations.pytorch import ToTensor, ToTensorV2

In [3]:
SEED = 623
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

# 1. Read Dataset

In [4]:
path = 'C:\\Users\\bokhy\\Desktop\\kaggle\\bengaliai-cv19\\'
df_train = pd.read_csv(os.path.join(path, 'train.csv'))

In [5]:
df_train.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


# 2. Split the dataset (Train/Valid)

In [6]:
df_train['id'] = df_train['image_id'].apply(lambda x:int(x.split('_')[1]))

In [7]:
X = df_train[['id','grapheme_root','vowel_diacritic','consonant_diacritic']].values[:,0]
y = df_train[['id','grapheme_root','vowel_diacritic','consonant_diacritic']].values[:,1:]

In [8]:
mskf = MultilabelStratifiedKFold(n_splits = 6, random_state= 623)
df_train['fold'] = -1

In [9]:
for i,(train_idx, vld_idx) in enumerate(mskf.split(X,y)):
    df_train.loc[vld_idx, 'fold'] = i

In [10]:
# look at the eqaul sample keeping distribution of all 3 cahraters
df_train['fold'].value_counts()

5    33474
0    33474
4    33473
3    33473
2    33473
1    33473
Name: fold, dtype: int64

In [11]:
df_train

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,id,fold
0,Train_0,15,9,5,ক্ট্রো,0,1
1,Train_1,159,0,0,হ,1,1
2,Train_2,22,3,5,খ্রী,2,0
3,Train_3,53,2,2,র্টি,3,4
4,Train_4,71,9,5,থ্রো,4,2
...,...,...,...,...,...,...,...
200835,Train_200835,22,7,2,র্খে,200835,2
200836,Train_200836,65,9,0,ত্তো,200836,5
200837,Train_200837,2,1,4,অ্যা,200837,0
200838,Train_200838,152,9,0,স্নো,200838,1


In [12]:
#df_train['fold'] = pd.read_csv(os.path.join(path, 'df_folds.csv'))['fold']

In [13]:
trn_fold = [i for i in range(6) if i not in [5]]
vld_fold = [5]
trn_idx = df_train.loc[df_train['fold'].isin(trn_fold)].index
vld_idx = df_train.loc[df_train['fold'].isin(vld_fold)].index

# 3. Define the data

In [35]:
train_parquet = ['train_image_data_0.parquet',
        'train_image_data_1.parquet',
        'train_image_data_2.parquet',
        'train_image_data_3.parquet']

In [36]:
# Careful (take long)
for fname in train_parquet:
    F = os.path.join(path, fname)
    df_train = pd.read_parquet(F)
    img_ids = df_train['image_id'].values
    img_array = df_train.iloc[:,1:].values
    for idx in tqdm(range(len(df_train))):
        img_id = img_ids[idx]
        img = img_array[idx]
        joblib.dump(img, f'C:\\Users\\bokhy\\Desktop\\kaggle\\bengaliai-cv19\\train_images\\{img_id}.pkl')

100%|███████████████████████████████████████████████████████████████████████████| 50210/50210 [02:14<00:00, 373.10it/s]
100%|███████████████████████████████████████████████████████████████████████████| 50210/50210 [02:07<00:00, 394.28it/s]
100%|███████████████████████████████████████████████████████████████████████████| 50210/50210 [02:09<00:00, 386.54it/s]
100%|███████████████████████████████████████████████████████████████████████████| 50210/50210 [02:07<00:00, 392.41it/s]


In [14]:
class BengaliDataset(Dataset):
    def __init__(self, csv, img_height, img_width, transform):
        self.csv = csv.reset_index()
        self.img_ids = csv['image_id'].values
        self.img_height = img_height
        self.img_width = img_width
        self.transform = transform
        
    def __len__(self):
        return len(self.csv)
    
    def __getitem__(self, index):
        img_id = self.img_ids[index]
        img = joblib.load(f'C:\\Users\\bokhy\\Desktop\\kaggle\\bengaliai-cv19\\train_images\\{img_id}.pkl' )
        img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
        img = 255 - img
        
        img = img[:, :, np.newaxis]
        img = np.repeat(img, 3, 2)
        
        if self.transform is not None:
            img = self.transform(image = img)['image']
            
        label_1 = self.csv.iloc[index].grapheme_root
        label_2 = self.csv.iloc[index].vowel_diacritic
        label_3 = self.csv.iloc[index].consonant_diacritic
        
        return img, np.array([label_1, label_2, label_3])

# 4. Define Image Augmenation (Albumentation)

In [15]:
train_augmentation = Compose([
    Rotate(20),
    ToTensor()
])

# We dont do augmentation on Validation set
valid_augmentation = Compose([
    ToTensor()
])

# 5. Load Dataset on DataLoader

In [16]:
# Train and Valid Data sets are created per below code
trn_dataset = BengaliDataset(csv = df_train.loc[trn_idx][:1000],
                            img_height = 137,
                            img_width = 236,
                            transform = train_augmentation)

vld_dataset = BengaliDataset(csv = df_train.loc[vld_idx][:1000],
                            img_height = 137,
                            img_width = 236,
                            transform = valid_augmentation)

In [17]:
# Create batch to use it for learning
trn_loader = DataLoader(trn_dataset,
                       shuffle = True,
                       num_workers = 0, 
                       batch_size = 128 # or 256
                       )

vld_loader = DataLoader(vld_dataset,
                       shuffle = False,
                       num_workers = 0, 
                       batch_size = 128 # or 256
                       )

# 6. Create a model, optimizer and criterion

In [18]:
import pretrainedmodels 

In [19]:
model_name = 'resnet50'
model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')

In [20]:
# change the last layer to fit our task
in_features = model.last_linear.in_features
# total output numbers are 186 
model.last_linear = torch.nn.Linear(in_features, 186)

In [21]:
# Use GPU computing
model = model.cuda()

In [22]:
# Create Optimizer (make it learn parameters of our model)
optimizer = torch.optim.AdamW(model.parameters(), lr = 0.001)

In [23]:
loss_fn = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                      mode = 'max',
                                                      verbose = True,
                                                      patience = 7,
                                                      factor = 0.5)

# 7. Train the Model

In [24]:
from tqdm import tqdm_notebook

In [25]:
best_score = - 1

In [26]:
#for e in range(50):
train_loss = []
# this means that 'we start the training!'
model.train()

# tqdm_notebook wrap would show the whole epochs
for inputs, targets in tqdm_notebook(trn_loader):

    inputs = inputs.cuda()
    targets = targets.cuda()

    logits = model(inputs)

    grapheme = logits[:, :168]
    vowel = logits[:, 168:179]
    cons = logits[:, 179:]

    # targets[:,0] --> first column in targets is the grapheme
    # targets[:,1] --> second column is the vowel
    # targets[:,2] --> third  column is the consonants
    loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:,2]) 

    loss.backward()

    optimizer.step()
    optimizer.zero_grad()
    # loss.item would make is a value from tensor
    train_loss.append(loss.item())

val_loss = []
val_true = []
val_pred = []

model.eval()

with torch.no_grad():
    for inputs, targets in tqdm_notebook(vld_loader):

        inputs = inputs.cuda()
        targets = targets.cuda()

        logits = model(inputs)

        grapheme = logits[:, :168]
        vowel = logits[:, 168:179]
        cons = logits[:, 179:]

        # targets[:,0] --> first column in targets is the grapheme
        # targets[:,1] --> second column is the vowel
        # targets[:,2] --> third  column is the consonants
        loss = loss_fn(grapheme, targets[:,0]) + loss_fn(vowel, targets[:,1]) + loss_fn(cons, targets[:,2]) 

        val_loss.append(loss.item())

        grapheme = grapheme.cpu().argmax(dim=1).data.numpy()
        vowel    = vowel.cpu().argmax(dim=1).data.numpy()
        cons     = cons.cpu().argmax(dim=1).data.numpy()

        val_true.append(targets.cpu().numpy())
        val_pred.append(np.stack([grapheme,vowel,cons], axis = 1))

val_true = np.concatenate(val_true)
val_pred = np.concatenate(val_pred)

val_loss = np.mean(val_loss)
train_loss = np.mean(train_loss)

score_g = recall_score(val_true[: , 0], val_pred[:, 0], average = 'macro')
score_v = recall_score(val_true[: , 1], val_pred[:, 1], average = 'macro')
score_c = recall_score(val_true[: , 2], val_pred[:, 2], average = 'macro')

final_score = np.average([score_g , score_v , score_c], weights = [2,1,1])

print(f'train_loss: {train_loss: .5f}; val_loss: {val_loss: .5f}; score: {final_score: .5f}')
print(f'score_g: {score_g: .5f}; score_v: {score_v: .5f}; score_c: {score_c: .5f}')

if final_score > best_score:
    best_score = final_score

    stat_dict = model.cpu().stat_dict()
    model = model.cpu()
    torch.save(stat_dict, 'model.pt')

HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




RuntimeError: CUDA out of memory. Tried to allocate 260.00 MiB (GPU 0; 4.00 GiB total capacity; 2.84 GiB already allocated; 46.96 MiB free; 54.21 MiB cached)

In [None]:
# To see model's weight
model.load_stat_dict(torch.load('./model.pt'))