In [None]:
# !pip install torchcontrib
# !pip install iterative-stratification
# !pip install fastparquet
# !pip install -U albumentations

In [None]:
!python -V

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn

import zipfile
import warnings
warnings.filterwarnings('ignore')

from torchvision.transforms import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt
from torchcontrib.optim import SWA
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import joblib
from albumentations.pytorch import ToTensorV2
import albumentations as A
import cv2

from tqdm.auto import tqdm

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
# config

index = 0
HEIGHT = 137
WIDTH = 236

data_dir = './bengaliai-cv19'


Data Load

In [None]:
df_train = pd.read_csv(f'./{data_dir}/train.csv')
files_train = [f'train_image_data_{fid}.parquet' for fid in range(4)]

In [None]:
%%time
F = os.path.join(data_dir, files_train[0])

train0 = pd.read_parquet(F)

In [None]:
idx = 0
img = train0.iloc[idx, 1:].values.astype(np.uint8)

img.reshape(137, 236).shape

In [None]:
idx = np.random.randint(len(train0))
img = train0.iloc[idx, 1:].values.astype(np.uint8)
img = 255 - img

plt.imshow(img.reshape(137, 236), cmap= 'gray')

In [None]:
img_ids = train0['image_id'].values
img_array = train0.iloc[:, 1:].values

In [None]:
# !mkdir train_image

img_id  = img_ids[idx]
img = img_array[idx]

joblib.dump(img, f'{data_dir}/train_image/{img_id}.pkl')

In [None]:
# data set memory를 효율적으로 쓰기위해 pandas가 아닌 array로 저장
img_ids = df_train['image_id'].values
tmp = 0
for fname in (files_train):
  F = os.path.join(data_dir, fname)
  df_train = pd.read_parquet(F)
  img_array = df_train.iloc[:,1:].values
  for idx in tqdm(range(len(df_train))):
    img_id = img_ids[tmp]
    img = img_array[idx]
    joblib.dump(img, f'{data_dir}/train_image/{img_id}.pkl')
    tmp += 1

In [None]:
#저장한 dump파일을 하나씩 불러와서 학습하도록 만들어줌
img_id = df_train['image_id'][0]
img = joblib.load(f'{data_dir}/train_image/{img_id}.pkl').astype(np.uint8) #이미지는 0~255 int8로 해주는게 컴퓨팅 계산에 효율적

img = img.reshape(HEIGHT, WIDTH)
img[:,:, np.newaxis].shape # 채널을 하나 만들어줌 

In [None]:
joblib.load(f'{data_dir}/train_image/{img_id}.pkl')

# Splits dataset

In [None]:
df_train = pd.read_csv('./bengaliai-cv19/train.csv')

In [None]:
df_train.head()

In [None]:
plt.figure(figsize = (10,20))
df_train['consonant_diacritic'].value_counts().sort_index().plot.barh()  #data bias가 상당히 높음

In [None]:
# 레이블 지정

label_1 = df_train.iloc[index].grapheme_root
label_2 = df_train.iloc[index].vowel_diacritic
label_3 = df_train.iloc[index].consonant_diacritic

In [None]:
df_train

In [None]:
df_train["id"] = df_train['image_id'].apply(lambda x : int(x.split('_')[1]))

In [None]:
X = df_train[['id', 'grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values[:, 0]
y = df_train[['id', 'grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values[:, 1:]

In [None]:
mskf = MultilabelStratifiedKFold(n_splits= 6, shuffle = True,random_state=42)

In [None]:
df_train['fold'] = -1

In [None]:
%%time

for i, (trn_idx, vid_idx) in enumerate(mskf.split(X,y)):
  df_train.loc[vid_idx, 'fold'] = i

In [None]:
df_train['fold'].value_counts()

In [None]:
df_train.to_csv('./bengaliai-cv19/df_folds.csv', index = False)

# Define dataset

In [4]:
class BengaliDataset(Dataset):
  def __init__(self, csv, img_height, img_width, transform):
    self.csv = csv.reset_index()
    self.img_ids = csv['image_id'].values
    self.img_height = img_height
    self.img_width = img_width
    self.transform = transform


  def __len__(self):
    return len(self.csv)


  def __getitem__(self, index):
    img_id = self.img_ids[index]
    img = joblib.load(f'./{data_dir}/train_image/{img_id}.pkl')
    img = img.reshape(self.img_height, self.img_width).astype(np.uint8)
    img = (255 - img)/255
    img = img[:,:, np.newaxis]

    img = np.repeat(img, 3, 2)   #채널을 3으로 변경해줌

    if self.transform is not None:
      img = self.transform(image = img)['image']

    label_1 = self.csv.iloc[index].grapheme_root
    label_2 = self.csv.iloc[index].vowel_diacritic
    label_3 = self.csv.iloc[index].consonant_diacritic

    return img, np.array([label_1, label_2, label_3])

# Define augmentations

In [5]:
train_aug = A.Compose([
    A.Rotate(20),
    A.pytorch.transforms.ToTensorV2()
])

vaild_aug = A.Compose([
    A.pytorch.transforms.ToTensorV2()
])

In [6]:
train_aug

Compose([
  Rotate(always_apply=False, p=0.5, limit=(-20, 20), interpolation=1, border_mode=4, value=None, mask_value=None),
  ToTensorV2(always_apply=True, p=1.0, transpose_mask=False),
], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={})

# Make dataloader

In [7]:
df_train = pd.read_csv('./bengaliai-cv19/df_folds.csv')
df_train.tail()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,id,fold
200835,Train_200835,22,7,2,র্খে,200835,1
200836,Train_200836,65,9,0,ত্তো,200836,4
200837,Train_200837,2,1,4,অ্যা,200837,5
200838,Train_200838,152,9,0,স্নো,200838,4
200839,Train_200839,127,2,0,ল্টি,200839,4


In [8]:
trn_fold = [i for i in range(6) if i not in [5]]
vld_fold = [5]
trn_idx = df_train.loc[df_train['fold'].isin(trn_fold)].index
vld_idx = df_train.loc[df_train['fold'].isin(vld_fold)].index

In [9]:
trn_dataset = BengaliDataset(csv = df_train.loc[trn_idx],
                             img_height = HEIGHT,
                             img_width = WIDTH,
                             transform = train_aug)

vld_dataset = BengaliDataset(csv = df_train.loc[vld_idx],
                            img_height = HEIGHT,
                            img_width = WIDTH,
                            transform = vaild_aug)


In [10]:
trn_loader = DataLoader(trn_dataset,
                       shuffle = True,
#                        num_workers = 2,
                       batch_size = 32,
                       )

vld_loader = DataLoader(vld_dataset,
                       shuffle = False,
#                        num_workers = 2,
                       batch_size = 32,
                       )

In [17]:
%%time
for inputs, targets in tqdm(trn_loader):
    break
    
# def train():
#   for inputs , targets in tqdm(trn_loader):
#         break
# if __name__ == '__main__':

#     train()

  0%|          | 0/5231 [00:00<?, ?it/s]

AttributeError: 'Tensor' object has no attribute 'shpae'

# Create model, opt, criterion

In [None]:
# !pip install pretrainedmodels

In [21]:
import pretrainedmodels

In [22]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [23]:
model_name = 'resnet34'
model = pretrainedmodels.__dict__[model_name](pretrained = "imagenet")

In [24]:
in_features =  model.last_linear.in_features

model.last_linear = nn.Linear(in_features, 186)

In [26]:
model = model.cuda()

In [28]:
optimizer = torch.optim.AdamW(model.parameters(), lr = 0.001)

In [29]:
loss_fn = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                      mode='max',
                                                      verbose = True,
                                                      factor = 0.5)

In [30]:
train_loss = []
model.train()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [46]:
for inputs, targets in tqdm(trn_loader):
    inputs = inputs.cuda()
    targets = targets.cuda()
    
    logits = model(inputs.float())
    
    
    grapheme = logits[:, :168]
    vowel = logits[:, 168:179]
    cons = logits[:, 179:]
    
    
    loss = (0.5 * loss_fn(grapheme, targets[:, 0])) + (0.25*loss_fn(vowel, targets[:, 1])) + (0.25*loss_fn(cons, targets[: , 2]))
    loss.backward()
    
    optimizer.step()
    optimizer.zero_grad()
    train_loss.append(loss.item())

#     break

  0%|          | 0/5231 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [80]:
best_score = -1

In [81]:
for epoch in range(10):
    val_loss = []
    val_true = []
    val_pred = []


    model.eval()

    with torch.no_grad():
        for inputs, targets in tqdm(vld_loader):
            inputs = inputs.cuda()
            targets = targets.cuda()

            logits = model(inputs.float())


            grapheme = logits[:, :168]
            vowel = logits[:, 168:179]
            cons = logits[:, 179:]


            loss = (0.5 * loss_fn(grapheme, targets[:, 0])) + (0.25*loss_fn(vowel, targets[:, 1])) + (0.25*loss_fn(cons, targets[: , 2]))


            val_loss.append(loss.item())

            grapheme = torch.nn.functional.softmax(grapheme.cpu(), dim=1).argmax(dim=1).data.numpy()
            vowel = torch.nn.functional.softmax(vowel.cpu(), dim=1).argmax(dim=1).data.numpy()
            cons = torch.nn.functional.softmax(cons.cpu(), dim=1).argmax(dim=1).data.numpy()

            val_true.append(targets.cpu().numpy())
            val_pred.append(np.stack([grapheme, vowel, cons], axis = 1))

    val_true = np.concatenate(val_true)
    val_pred = np.concatenate(val_pred)
    val_loss = np.mean(val_loss) 
    train_loss = np.mean(train_loss)

    score_g = recall_score(val_true[:, 0], val_pred[:, 0], average='macro')
    score_v = recall_score(val_true[:, 1], val_pred[:, 1], average='macro')
    score_c = recall_score(val_true[:, 2], val_pred[:, 2], average='macro')
    final_score = np.average([score_g,score_v, score_c], weights = [2,1,1])

    print(f'train_loss : {train_loss:.5f}; val_loss: {val_loss:.5f}; score: {final_score:.5f}')
    print(f'score_g : {score_g:.5f}; score_v: {score_v:.5f}; score_c: {score_c:.5f}')


    if final_score > best_score:
        best_score = final_score

        state_dict = model.cpu().state_dict()
        model = model.cuda()
        torch.save(state_dict, "test_model.pt")
        print('SAVE MODE UPDATE')
    

  0%|          | 0/1047 [00:00<?, ?it/s]

train_loss : 1.39816; val_loss: 0.64982; score: 0.73888
score_g : 0.66300; score_v: 0.81963; score_c: 0.80989


  0%|          | 0/1047 [00:00<?, ?it/s]

train_loss : 1.39816; val_loss: 0.64982; score: 0.73888
score_g : 0.66300; score_v: 0.81963; score_c: 0.80989


  0%|          | 0/1047 [00:00<?, ?it/s]

train_loss : 1.39816; val_loss: 0.64982; score: 0.73888
score_g : 0.66300; score_v: 0.81963; score_c: 0.80989


  0%|          | 0/1047 [00:00<?, ?it/s]

train_loss : 1.39816; val_loss: 0.64982; score: 0.73888
score_g : 0.66300; score_v: 0.81963; score_c: 0.80989


  0%|          | 0/1047 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [60]:
np.concatenate(val_true).shape

(33474, 3)

In [82]:
model.load_state_dict(torch.load('./test_model.pt'))

<All keys matched successfully>