In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp

In [2]:
!ls /mnt/chicm/data/bengali

bengaliai-cv19.zip	   test_image_data_3.parquet
class_map.csv		   train.csv
sample_submission.csv	   train_image_data_0.parquet
test.csv		   train_image_data_1.parquet
test_image_data_0.parquet  train_image_data_2.parquet
test_image_data_1.parquet  train_image_data_3.parquet
test_image_data_2.parquet


In [4]:
!ls /home/chec/data/bengali

ls: cannot access '/home/chec/data/bengali': No such file or directory


In [3]:
DATA_DIR = '/mnt/chicm/data/bengali'

In [4]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [5]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [30]:
train_df[train_df.grapheme_root==0].head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
2299,Train_2299,0,0,0,ং
2731,Train_2731,0,0,0,ং
3147,Train_3147,0,0,0,ং
3648,Train_3648,0,0,0,ং
4187,Train_4187,0,0,0,ং


In [9]:
train_df.shape

(200840, 5)

In [10]:
len(train_df.image_id.unique())

200840

In [10]:
train_df.vowel_diacritic.value_counts()

0     41508
1     36886
7     28723
2     25967
4     18848
3     16152
9     16032
5      5297
6      4336
10     3563
8      3528
Name: vowel_diacritic, dtype: int64

In [11]:
train_df.consonant_diacritic.value_counts()

0    125278
2     23465
5     21397
4     21270
1      7424
6      1387
3       619
Name: consonant_diacritic, dtype: int64

In [82]:
train_df[train_df.grapheme_root==0].grapheme_root.value_counts()

0    147
Name: grapheme_root, dtype: int64

In [137]:
torch.tensor([]).size()

torch.Size([0])

In [135]:
torch.tensor(np.array(list(set(train_df[train_df.consonant_diacritic==0].grapheme_root.values)))).size()

torch.Size([168])

In [160]:
consonant_dict = {}
for i in range(7):
    consonant_dict[i] = torch.tensor(np.array(list(set(range(168)) - set(train_df[train_df.consonant_diacritic==i].grapheme_root.values)))).long()

In [161]:
consonant_dict

{0: tensor([], dtype=torch.int64),
 1: tensor([  0,   1,   2,   5,   7,   8,  10,  11,  12,  14,  15,  16,  17,  18,
          19,  20,  21,  24,  25,  26,  27,  28,  30,  31,  32,  33,  34,  35,
          36,  37,  39,  40,  41,  43,  44,  45,  46,  47,  49,  50,  51,  52,
          53,  54,  56,  57,  58,  59,  60,  61,  62,  63,  65,  66,  67,  68,
          69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,  83,  84,
          85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  97,  98,  99,
         100, 101, 102, 104, 105, 106, 108, 109, 110, 111, 112, 114, 115, 116,
         117, 118, 119, 120, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131,
         132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
         146, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161,
         162, 163, 164, 165, 166, 167]),
 2: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  14,
          15,  16,  17,  18,  19,  20,  24,  25, 

In [158]:
vowel_dict = {}
for i in range(11):
    vowel_dict[i] = torch.tensor(np.array(list(set(range(168)) - set(train_df[train_df.vowel_diacritic==i].grapheme_root.values)))).long()

In [159]:
vowel_dict

{0: tensor([ 33,  34, 163,  73, 108,  82, 114, 152,  26, 158,  28, 157, 126]),
 1: tensor([  0,   1, 130,   3,   4,   5,   6,   7,   8, 131,  10,  11,  12, 137,
         138, 145, 146,  19,  20,  26,  27, 154,  30, 158, 160,  33, 164,  37,
         166,  45,  51,  63,  68,  80,  82,  84,  87,  90,  93, 102, 104, 105,
         108, 110, 126]),
 2: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12, 135,
         130, 143, 144, 145,  19,  20, 146,  24,  26, 154, 157, 158, 161, 162,
         163, 164, 166,  41,  45,  46,  49,  60,  63,  67,  73,  78,  80,  82,
          87,  97,  99, 100, 102, 104, 105, 106, 114, 116, 121, 126]),
 3: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  14,
          15,  16,  17,  19,  24,  26,  27,  28,  29,  30,  31,  33,  34,  35,
          37,  39,  40,  41,  42,  45,  47,  48,  49,  50,  51,  54,  58,  60,
          61,  63,  67,  73,  75,  77,  78,  80,  83,  84,  87,  94,  97,  98,
          99, 101, 102, 1

In [162]:
torch.save({
    'vowel_dict': vowel_dict,
    'consonant_dict': consonant_dict
}, 'post_process.pth')

In [96]:
t = torch.load('post_process.pth')
t

{'vowel_dict': {1: tensor([  0,   1, 130,   3,   4,   5,   6,   7,   8, 131,  10,  11,  12, 137,
          138, 145, 146,  19,  20,  26,  27, 154,  30, 158, 160,  33, 164,  37,
          166,  45,  51,  63,  68,  80,  82,  84,  87,  90,  93, 102, 104, 105,
          108, 110, 126]),
  2: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12, 135,
          130, 143, 144, 145,  19,  20, 146,  24,  26, 154, 157, 158, 161, 162,
          163, 164, 166,  41,  45,  46,  49,  60,  63,  67,  73,  78,  80,  82,
           87,  97,  99, 100, 102, 104, 105, 106, 114, 116, 121, 126]),
  3: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  14,
           15,  16,  17,  19,  24,  26,  27,  28,  29,  30,  31,  33,  34,  35,
           37,  39,  40,  41,  42,  45,  47,  48,  49,  50,  51,  54,  58,  60,
           61,  63,  67,  73,  75,  77,  78,  80,  83,  84,  87,  94,  97,  98,
           99, 101, 102, 104, 105, 106, 108, 110, 111, 112, 114, 121, 126, 127

In [87]:
set(range(10)) - set([5, 2])

{0, 1, 3, 4, 6, 7, 8, 9}

In [32]:
len(train_df[train_df.vowel_diacritic==0].grapheme_root.value_counts())

155

In [44]:
for i in range(1, 7):
    print(i)

1
2
3
4
5
6


In [154]:
x0 = torch.randn((4, 168))
x0

tensor([[-5.7090e-01,  6.7841e-01,  2.1868e-01,  9.2348e-01, -5.6974e-01,
          7.5332e-01,  8.8118e-01, -1.0677e+00,  3.9017e-01,  4.7894e-01,
         -5.7618e-01,  1.1432e-01,  1.6012e+00,  1.8910e-01,  1.7323e+00,
         -2.3128e+00,  8.4238e-01,  6.4054e-02,  2.3215e-01, -1.0059e+00,
         -9.5802e-01, -2.3800e+00,  1.4862e+00, -3.4377e-01, -7.6178e-01,
          9.9783e-01, -2.8732e-01,  1.4430e+00,  1.0123e+00,  2.4184e+00,
          1.1962e+00,  3.5566e-02,  7.0873e-01,  1.0308e+00, -1.5516e+00,
         -1.6503e+00,  3.7555e-01, -3.4377e-01, -1.6109e+00,  9.7220e-01,
          7.8631e-01,  1.6241e-01,  8.4838e-01,  5.8348e-01, -8.6711e-01,
         -2.7581e-01,  3.1388e-01, -6.1546e-01, -1.3130e+00, -1.0159e+00,
          1.1954e+00,  2.1349e-01,  1.4771e-01, -2.8559e-01,  5.9921e-01,
         -1.7303e-01,  1.5060e+00, -5.6738e-01,  2.9321e-01,  3.8793e-01,
         -1.7636e+00, -8.8412e-01, -1.1644e+00, -3.2564e-01,  1.0800e+00,
         -1.4333e+00, -7.6324e-01,  4.

In [163]:
score, x1 = torch.max(torch.randn(4, 7), dim=1)
x1

tensor([5, 2, 0, 4])

In [164]:
score

tensor([1.2782, 0.3687, 2.0671, 1.5945])

In [111]:
x1[0].item()

5

In [152]:
consonant_dict[0].long()

tensor([], dtype=torch.int64)

In [155]:
for i in range(x0.size(0)):
    x0[i, consonant_dict[x1[i].item()].long()] = -100000.

In [156]:
x0

tensor([[-5.7090e-01,  6.7841e-01,  2.1868e-01,  9.2348e-01, -5.6974e-01,
          7.5332e-01,  8.8118e-01, -1.0677e+00,  3.9017e-01,  4.7894e-01,
         -5.7618e-01,  1.1432e-01,  1.6012e+00,  1.8910e-01,  1.7323e+00,
         -2.3128e+00,  8.4238e-01,  6.4054e-02,  2.3215e-01, -1.0059e+00,
         -9.5802e-01, -2.3800e+00,  1.4862e+00, -3.4377e-01, -7.6178e-01,
          9.9783e-01, -2.8732e-01,  1.4430e+00,  1.0123e+00,  2.4184e+00,
          1.1962e+00,  3.5566e-02,  7.0873e-01,  1.0308e+00, -1.5516e+00,
         -1.6503e+00,  3.7555e-01, -3.4377e-01, -1.6109e+00,  9.7220e-01,
          7.8631e-01,  1.6241e-01,  8.4838e-01,  5.8348e-01, -8.6711e-01,
         -2.7581e-01,  3.1388e-01, -6.1546e-01, -1.3130e+00, -1.0159e+00,
          1.1954e+00,  2.1349e-01,  1.4771e-01, -2.8559e-01,  5.9921e-01,
         -1.7303e-01,  1.5060e+00, -5.6738e-01,  2.9321e-01,  3.8793e-01,
         -1.7636e+00, -8.8412e-01, -1.1644e+00, -3.2564e-01,  1.0800e+00,
         -1.4333e+00, -7.6324e-01,  4.

In [125]:
t1 = np.random.rand(4,168)
t1

array([[7.08282598e-01, 1.27083821e-01, 3.12678784e-01, 6.12507118e-01,
        4.36557678e-01, 5.70243515e-02, 2.00852664e-01, 8.40132137e-01,
        1.18369322e-01, 2.19194193e-01, 8.67025576e-01, 4.43840599e-01,
        8.47827325e-02, 1.53244331e-01, 3.10607041e-01, 4.48082775e-01,
        4.65979102e-01, 7.31390930e-01, 4.23772103e-01, 3.76272053e-01,
        6.33526797e-02, 4.45813879e-01, 6.29269720e-01, 3.76853777e-01,
        9.45934518e-01, 1.49456631e-01, 8.60526326e-01, 6.03383075e-01,
        7.42606734e-01, 2.29736642e-01, 2.21900139e-01, 8.72239381e-01,
        6.47834511e-01, 1.28407796e-01, 7.72576984e-01, 7.80999724e-01,
        3.00963432e-01, 6.23477358e-01, 5.97506116e-01, 6.66386147e-02,
        1.52603187e-01, 9.39119461e-02, 7.34306275e-01, 6.72842573e-01,
        2.25346134e-01, 7.31676393e-01, 9.93317728e-01, 1.55663783e-01,
        3.68474323e-01, 4.33920503e-01, 9.82081705e-01, 3.24330606e-01,
        9.99862456e-01, 9.41854675e-01, 4.28742002e-01, 1.285912

In [126]:
for i in range(t1.shape[0]):
    t1[i, vowel_dict[x1[i].item()].numpy()] = -100000.

In [129]:
x1 = np.random.rand(4, 11)

In [130]:
x1

array([[0.02286843, 0.94285136, 0.38830277, 0.60471556, 0.36469541,
        0.16311125, 0.44643639, 0.37814579, 0.76290386, 0.27457108,
        0.24688847],
       [0.67822297, 0.3942885 , 0.98698139, 0.74173984, 0.26378129,
        0.92764697, 0.57445771, 0.71490624, 0.36718408, 0.20250672,
        0.13880726],
       [0.4498732 , 0.86932675, 0.39903334, 0.58982651, 0.64100339,
        0.20068728, 0.22134874, 0.32741683, 0.77794367, 0.76240691,
        0.66784187],
       [0.90776698, 0.71308756, 0.42697915, 0.36056423, 0.5571547 ,
        0.05825743, 0.80948574, 0.75621422, 0.67206096, 0.66552588,
        0.14909721]])

In [133]:
np.argmax(x1, 1)[0]

1

In [68]:
torch.save({1: x}, 'tmpx')

In [69]:
t = torch.load('tmpx')

In [70]:
t

{1: tensor([-2.0961,  5.0000,  5.0000,  5.0000,  0.6562,  1.3608,  1.2173,  0.9374,
          5.0000,  1.9926,  0.6663, -0.8623])}

In [6]:
df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet')
df.head()

Unnamed: 0,image_id,0,1,2,3,4,5,6,7,8,...,32322,32323,32324,32325,32326,32327,32328,32329,32330,32331
0,Train_0,254,253,252,253,251,252,253,251,251,...,253,253,253,253,253,253,253,253,253,251
1,Train_1,251,244,238,245,248,246,246,247,251,...,255,255,255,255,255,255,255,255,255,254
2,Train_2,251,250,249,250,249,245,247,252,252,...,254,253,252,252,253,253,253,253,251,249
3,Train_3,247,247,249,253,253,252,251,251,250,...,254,254,254,254,254,253,253,252,251,252
4,Train_4,249,248,246,246,248,244,242,242,229,...,255,255,255,255,255,255,255,255,255,255


In [15]:
df.shape

(50210, 32333)

In [8]:
df['1']

0        253
1        244
2        250
3        247
4        248
        ... 
50205    250
50206    251
50207    245
50208    242
50209    255
Name: 1, Length: 50210, dtype: uint8

In [9]:
from sys import getsizeof

In [11]:
getsizeof(df) / (1024*1024)

1551.81369972229

In [25]:
datafile = f'{DATA_DIR}/train_image_data_0.parquet'
parq = pq.read_pandas(datafile, columns=[str(x) for x in range(32332)]).to_pandas()

In [26]:
getsizeof(parq) / (1024*1024)

1548.5681838989258

In [30]:
x = parq.values

In [34]:
del parq
gc.collect()

173

In [37]:
x1 = x.copy()

In [38]:
getsizeof(x1) / (1024*1024)

1548.185188293457

In [39]:
x1.shape

(50210, 32332)

In [40]:
imgs = torch.zeros(50210, HEIGHT * WIDTH, dtype=torch.uint8)
getsizeof(imgs) / (1024*1024)

7.62939453125e-05

In [41]:
for idx, image in enumerate(x1):
    #image = (image * (255.0 / image.max())).astype(np.uint8)
    imgs[idx, ...] = torch.from_numpy(image.reshape(-1).astype(np.uint8))
    #img_id = img_id + 1

In [42]:
getsizeof(imgs) / (1024*1024)

7.62939453125e-05

In [12]:
BATCH_SIZE = 96
N_WORKERS = 4

HEIGHT = 137
WIDTH = 236
TARGET_SIZE = 128
PADDING = 8

# Replace these to your values
MEAN = 0.0778441
STD = 0.216016

In [13]:
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax


def crop_resize(img0, size=TARGET_SIZE, pad=64):
    # crop a box around pixels large than the threshold
    # some images contain line at the sides
    ymin, ymax, xmin, xmax = bbox(img0[5:-5, 5:-5] > 80)

    # cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax, xmin:xmax]

    # remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax - xmin, ymax - ymin
    ls = max(lx, ly) + pad

    # make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((ls - ly) // 2,), ((ls - lx) // 2,)], mode='constant')

    return cv2.resize(img, (size, size))

In [17]:
from torch.utils.data import Dataset
from torch.utils.data.sampler import SequentialSampler
import pyarrow.parquet as pq

class BengaliParquetDataset(Dataset):

    def __init__(self, num_samples=1):
        
        self.num_samples = num_samples
        self.images = torch.zeros(num_samples, TARGET_SIZE * TARGET_SIZE, dtype=torch.uint8)
        img_id = 0

        for i in range(1):
            #datafile = INPUT_PATH + '/test_image_data_{}.parquet'.format(i)
            datafile = f'{DATA_DIR}/train_image_data_0.parquet'
            parq = pq.read_pandas(datafile, columns=[str(x) for x in range(32332)]).to_pandas()
            parq = 255 - parq.iloc[:, :].values.reshape(-1, HEIGHT, WIDTH).astype(np.uint8)
            
            # Not enough memory to do this using a large batch
            # parq = (parq * (255.0 / parq.max(axis=(1,2), keepdims=True))).astype(np.uint8)

            for idx, image in enumerate(parq):
                image = (image * (255.0 / image.max())).astype(np.uint8)
                self.images[img_id, ...] = torch.from_numpy(crop_resize(image, size=TARGET_SIZE, pad=PADDING).reshape(-1).astype(np.uint8))
                img_id = img_id + 1
                
        del parq
        
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        img = img.view(TARGET_SIZE, TARGET_SIZE)
        img = img.unsqueeze(0)

        return img, idx


In [18]:
bengali_dataset = BengaliParquetDataset(num_samples = 50210)

In [23]:
bengali_dataset.images.shape

torch.Size([50210, 16384])

In [24]:
50210*16384 / (1024*1024)

784.53125

In [13]:
df.shape

(50210, 32333)

In [165]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [179]:
backbone = pretrainedmodels.__dict__['dpn68'](num_classes=1000, pretrained='imagenet')

In [184]:
backbone.last_linear.in_channels

832

In [183]:
y = backbone.features(torch.randn(2,3,224,224))
y.size()

torch.Size([2, 832, 7, 7])

In [2]:
x = torch.randn(2,1,128,128)
x.size()

torch.Size([2, 1, 128, 128])

In [7]:
F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False).size()

torch.Size([2, 1, 224, 224])