In [3]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from tqdm import tqdm

In [4]:
!ls /mnt/chicm/data/bengali

bengaliai-cv19.zip	   test_image_data_3.parquet
class_map.csv		   train.csv
sample_submission.csv	   train_image_data_0.parquet
test.csv		   train_image_data_1.parquet
test_image_data_0.parquet  train_image_data_2.parquet
test_image_data_1.parquet  train_image_data_3.parquet
test_image_data_2.parquet


In [9]:
DATA_DIR = '/mnt/chicm/data/bengali'

In [10]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
test_df.head()

Unnamed: 0,row_id,image_id,component
0,Test_0_consonant_diacritic,Test_0,consonant_diacritic
1,Test_0_grapheme_root,Test_0,grapheme_root
2,Test_0_vowel_diacritic,Test_0,vowel_diacritic
3,Test_1_consonant_diacritic,Test_1,consonant_diacritic
4,Test_1_grapheme_root,Test_1,grapheme_root


In [7]:
sample_sub_df.head()

Unnamed: 0,row_id,target
0,Test_0_consonant_diacritic,0
1,Test_0_grapheme_root,0
2,Test_0_vowel_diacritic,0
3,Test_1_consonant_diacritic,0
4,Test_1_grapheme_root,0


In [11]:
test_img_df = pd.read_parquet(f'{DATA_DIR}/test_image_data_0.parquet')
test_img_df.head()

Unnamed: 0,image_id,0,1,2,3,4,5,6,7,8,...,32322,32323,32324,32325,32326,32327,32328,32329,32330,32331
0,Test_0,247,253,253,252,252,252,252,253,253,...,254,254,254,254,254,254,253,253,252,250
1,Test_1,253,253,253,253,253,253,253,253,253,...,255,255,255,255,255,255,255,255,255,255
2,Test_2,253,253,253,253,253,252,251,252,252,...,255,255,255,255,255,255,255,254,253,252


In [16]:
HEIGHT = 137
WIDTH = 236

In [17]:
#img = 255 - df.iloc[10, 1:].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
#plt.imshow(img)

In [18]:
#img2 = cv2.resize(img, (256, 128))
#plt.imshow(img2)

In [11]:
import albumentations as albu

def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)

In [12]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

HEIGHT = 137
WIDTH = 236

class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        if self.test_mode:
            img = 255 - self.img_df.iloc[idx].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
        else:
            row = self.df.iloc[idx]
            img = self.get_img(row.image_id)
        
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            img = augs(image=img)['image']
        
        img = np.expand_dims(img, axis=-1)
        
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        if self.df is None:
            return len(self.img_df)
        else:
            return len(self.df)
    

In [17]:
def get_test_loader(batch_size=4, idx=0):
    #img_dfs = [pd.read_parquet(f'{DATA_DIR}/test_image_data_{i}.parquet') for i in range(4)]
    #img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    img_df = pd.read_parquet(f'{DATA_DIR}/test_image_data_{idx}.parquet').set_index('image_id')

    ds = BengaliDataset(None, img_df, False, True)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=False)
    loader.num = len(ds)
    return loader

In [18]:
test_loader = get_test_loader()

for img in test_loader:
    print(img.size())
    print(img)
    break

torch.Size([3, 1, 137, 236])
tensor([[[[0.0314, 0.0078, 0.0078,  ..., 0.0118, 0.0118, 0.0118],
          [0.0275, 0.0118, 0.0118,  ..., 0.0118, 0.0235, 0.0157],
          [0.0353, 0.0157, 0.0157,  ..., 0.0118, 0.0118, 0.0078],
          ...,
          [0.0118, 0.0078, 0.0118,  ..., 0.0157, 0.0157, 0.0157],
          [0.0118, 0.0078, 0.0078,  ..., 0.0157, 0.0078, 0.0078],
          [0.0196, 0.0157, 0.0118,  ..., 0.0078, 0.0118, 0.0196]]],


        [[[0.0078, 0.0078, 0.0078,  ..., 0.0235, 0.0196, 0.0235],
          [0.0039, 0.0078, 0.0078,  ..., 0.0314, 0.0235, 0.0196],
          [0.0078, 0.0078, 0.0118,  ..., 0.0157, 0.0235, 0.0235],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0039, 0.0039, 0.0039,  ..., 0.0000, 0.0000, 0.0000]]],


        [[[0.0078, 0.0078, 0.0078,  ..., 0.0471, 0.0196, 0.0745],
          [0.0078, 0.0078, 0.0078,  ..., 0.0235, 0.0275, 0.0392],
         

# model

In [8]:
#import pretrainedmodels

In [10]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [81]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [82]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [83]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

torch.Size([2, 2048, 5, 8])

In [84]:
#model.last_linear.in_features

2048

In [19]:
class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        self.backbone = pretrainedmodels.__dict__[backbone_name](num_classes=1000, pretrained='imagenet')
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

    def logits(self, x):
        x = self.avg_pool(x)
        x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        x = self.conv0(x)
        #print(x.size())
        x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [20]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [21]:
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'best_model.pth'
args.predict = True

model = create_model(args)[0].cuda()

model file: ./models/se_resnext50_32x4d/best_model.pth, exist: True
loading ./models/se_resnext50_32x4d/best_model.pth...


# predict

In [25]:
def predict(model, test_loader):
    model.eval()
    preds0, preds1,preds2 = [], [], []
    with torch.no_grad():
        for x in test_loader:
            x = x.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    
    return preds0, preds1, preds2
            

In [23]:
t1 = np.arange(12).reshape(3,4)
t2 = np.arange(12).reshape(3,4)
np.concatenate([t1, t2], 0)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [24]:
import gc

preds0, preds1, preds2 = [], [], []

for i in range(4):
    test_loader = get_test_loader(batch_size=128, idx=i)
    p0, p1, p2 = predict(model, test_loader)
    preds0.append(p0)
    preds1.append(p1)
    preds2.append(p2)
    del test_loader
    gc.collect()
    
preds0 = np.concatenate(preds0, 0)
preds1 = np.concatenate(preds1, 0)
preds2 = np.concatenate(preds2, 0)

In [24]:
#preds = predict(model, test_loader)

In [26]:
row_id = []
target = []
for i in tqdm(range(len(preds0))):
    row_id += [f'Test_{i}_grapheme_root', f'Test_{i}_vowel_diacritic',
               f'Test_{i}_consonant_diacritic']
    target += [preds0[i], preds1[i], preds2[i]]
submission_df = pd.DataFrame({'row_id': row_id, 'target': target})
#submission_df.to_csv('submission.csv', index=False)
submission_df.head()

100%|██████████| 12/12 [00:00<00:00, 33047.70it/s]


Unnamed: 0,row_id,target
0,Test_0_grapheme_root,3
1,Test_0_vowel_diacritic,0
2,Test_0_consonant_diacritic,0
3,Test_1_grapheme_root,93
4,Test_1_vowel_diacritic,2


In [29]:
submission_df.shape

(36, 2)

In [None]:
#np.random.beta(1,1)

In [25]:
preds[0].shape

(12,)