In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

/kaggle/input/modelv2/whole_grapheme.pth
/kaggle/input/bengaliai-cv19/test_image_data_2.parquet
/kaggle/input/bengaliai-cv19/test_image_data_3.parquet
/kaggle/input/bengaliai-cv19/test_image_data_0.parquet
/kaggle/input/bengaliai-cv19/train.csv
/kaggle/input/bengaliai-cv19/test_image_data_1.parquet
/kaggle/input/bengaliai-cv19/class_map.csv
/kaggle/input/bengaliai-cv19/train_image_data_3.parquet
/kaggle/input/bengaliai-cv19/train_image_data_2.parquet
/kaggle/input/bengaliai-cv19/test.csv
/kaggle/input/bengaliai-cv19/sample_submission.csv
/kaggle/input/bengaliai-cv19/train_image_data_1.parquet
/kaggle/input/bengaliai-cv19/train_image_data_0.parquet
/kaggle/input/se-resnext50-32x4d-fold2/se_resnext50_32x4d_fold2.pkl
/kaggle/input/densenet121/densenet121-a639ec97.pth
/kaggle/input/kerasefficientnetb3/Train1_model_70.h5
/kaggle/input/kerasefficientnetb3/efficientnet-1.0.0-py3-none-any.whl
/kaggle/input/kerasefficientnetb3/Train1_model_64.h5
/kaggle/input/kerasefficientnetb3/Train1_model_57

In [2]:
# Packages
import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from tqdm import tqdm

import gc
import os
import random
import sys
import albumentations as A
import pyarrow
import cv2
import itertools

import six
from torch.utils.data import Dataset


from torch import nn
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from torch.nn import Sequential
from torchvision import models
import torchvision


# Our Model

In [3]:
# Data Processing Parameters
HEIGHT = 137
WIDTH = 236
SIZE = 128

model_name = "modelv2/whole_grapheme.pth"

## Define our dataset classes

In [4]:
# wrapper
class DatasetMixin(Dataset):

    def __init__(self, transform=None):
        self.transform = transform

    def __getitem__(self, index):
        """Returns an example or a sequence of examples."""
        if torch.is_tensor(index):
            index = index.tolist()
        if isinstance(index, slice):
            current, stop, step = index.indices(len(self))
            return [self.get_example_wrapper(i) for i in
                    six.moves.range(current, stop, step)]
        elif isinstance(index, list) or isinstance(index, np.ndarray):
            return [self.get_example_wrapper(i) for i in index]
        else:
            return self.get_example_wrapper(index)

    def __len__(self):
        """Returns the number of data points."""
        raise NotImplementedError

    def get_example_wrapper(self, i):
        """Wrapper of `get_example`, to apply `transform` if necessary"""
        example = self.get_example(i)
        
        if self.transform:
            
            if self.labels:
                example_img = self.transform(image = example[0])['image']
            
                return example_img, example[1]
        
            # doing this terrible code, because Idk how to make the super take a none type
            example_img = self.transform(image = example)['image']
            return example

    def get_example(self, i):
        """Returns the i-th example.

        Implementations should override it. It should raise :class:`IndexError`
        if the index is invalid.

        Args:
            i (int): The index of the example.

        Returns:
            The i-th example.

        """
        raise NotImplementedError


In [5]:

# actual class
class BengaliAIDataset(DatasetMixin):
    def __init__(self, images, labels=None, transform=None, indices=None):
        super(BengaliAIDataset, self).__init__(transform=transform)
        self.images = images
        self.labels = labels
        if indices is None:
            indices = np.arange(len(images))
        self.indices = indices
        self.train = labels is not None

    def __len__(self):
        """return length of this dataset"""
        return len(self.indices)

    def get_example(self, i):
        """Return i-th data"""
        i = self.indices[i]
        x = self.images[i]
        
        # for future Affine transformation
        x = x.astype(np.float32)
        if self.train:
            y = self.labels[i]
            return x, y
        else:
            return x

## Define our Data Processing Pipeline

In [6]:
###################################################################
# Load Images
###################################################################

# during submission, we load via parquet
def prepare_image(datadir, data_type, submission=False, indices=[0, 1, 2, 3]):

    assert data_type in ['train', 'test']
    if submission:
        image_df_list = [pd.read_parquet(datadir + f'/{data_type}_image_data_{i}.parquet')
                         for i in indices]
    else:
        image_df_list = [pd.read_feather(datadir + f'/{data_type}_image_data_{i}.feather')
                         for i in indices]

    print('image_df_list', len(image_df_list))
    HEIGHT = 137
    WIDTH = 236
    
    #somehow the original input is inverted
    images = [df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH) for df in image_df_list]
    
    del image_df_list
    gc.collect()
    images = np.concatenate(images, axis=0)
    return images

# convert our graphemes to labels
def convertGrapheme(train):
    graphemes = train.grapheme.unique()
    num_graphemes = len(graphemes)
    grapheme_dict = dict(zip(graphemes, range(num_graphemes)))
    
    return train.replace({"grapheme":grapheme_dict})


In [7]:
###################################################################
# Crop and Resize our images
###################################################################
# bounding box
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    
    #remove low intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    #img = cv2.normalize(img+255, None, dtype=cv2.CV_32F)
    return cv2.resize(img,(size,size))

def crop_char_image(image, threshold=40./255.):
    assert image.ndim == 2
    is_black = image > threshold

    is_black_vertical = np.sum(is_black, axis=0) > 0
    is_black_horizontal = np.sum(is_black, axis=1) > 0
    left = np.argmax(is_black_horizontal)
    right = np.argmax(is_black_horizontal[::-1])
    top = np.argmax(is_black_vertical)
    bottom = np.argmax(is_black_vertical[::-1])
    height, width = image.shape
    cropped_image = image[left:height - right, top:width - bottom]
    return cropped_image

def resize(image, size=(128, 128)):
    return cv2.resize(image, size)


# run for all images 
def runCropRsz(images):
    crop_rsz_img = []
    for idx in range(len(images)):
        #img0 = (255 - images[idx]).astype(np.float32)
        # normalize each image by its max val
        #img = (img0*(255.0/img0.max())).astype(np.float32)
        #img = crop_resize(img)
        
        img = images[idx]
        img = crop_char_image(img, threshold = 40./255.)
        img = resize(img)
        
        # add to our stored list
        crop_rsz_img.append(img)

    crop_rsz_img = np.array(crop_rsz_img)
    
    return crop_rsz_img


In [8]:
# define our augmentations
def augPipeline(P = .75):
    return A.Compose([
        A.IAAAdditiveGaussianNoise(p=.6),
        A.OneOf([
            A.MedianBlur(blur_limit=3, p=0.6),
            A.Blur(blur_limit=1, p=0.6),
        ], p=0.5),
        A.ShiftScaleRotate( rotate_limit=15, p=.85), # leave shift and scale as defaults
        A.OneOf([
            A.OpticalDistortion(p=.6),
            A.GridDistortion(p=.4),
            A.IAAPiecewiseAffine(p=.75),
        ], p=.5)],
        p=P)

In [9]:
###################################################################
# Main Function to Generate and Load the dataset
###################################################################
def genDataset(indices, inputdir, split_num, split_index):
    assert data_type in ['train', 'test']    
    
    submission = False if data_type == "train" else True
    indices = indices # which train files to load 
    images = prepare_image(inputdir, data_type=data_type, submission=submission, indices=indices)
    images = np.array_split(images, split_num)[split_index]
    # lets split the indices
    
    print("~~Loaded Images~~")
    
    
    # run our crop and resize functions
    crop_rsz_img = runCropRsz(images)
    print("~~Standardized Images~~")

    p = .5 if data_type == "train" else 0
    # init augmentation pipeline
    augmentation = augPipeline(p)

    # generate our dataset
    dataset = BengaliAIDataset(crop_rsz_img, transform = augmentation)
    del images, crop_rsz_img
    gc.collect()
    return dataset


## Define Our Model Classes

In [10]:
# this is just a skip connection (very important if we have a ton of layers)
# if we want to concatenate input with the output of the linear layer + activation
def residual_add(lhs, rhs):
    lhs_ch, rhs_ch = lhs.shape[1], rhs.shape[1]
    if lhs_ch < rhs_ch:
        out = lhs + rhs[:, :lhs_ch]
    elif lhs_ch > rhs_ch:
        out = torch.cat([lhs[:, :rhs_ch] + rhs, lhs[:, rhs_ch:]], dim=1)
    else:
        out = lhs + rhs
    return out


# block of linear functions - this is a single layer and can be changed 
# change this if we want to change our functions
class LinearBlock(nn.Module):

    def __init__(self, in_features, out_features, bias=True,
                 use_bn=True, activation=F.relu, dropout_ratio=-1, residual=False,):
        super(LinearBlock, self).__init__()
        
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        if use_bn:
            self.bn = nn.BatchNorm1d(out_features)
        if dropout_ratio > 0.:
            self.dropout = nn.Dropout(p=dropout_ratio)
        else:
            self.dropout = None
        self.activation = activation
        self.use_bn = use_bn
        self.dropout_ratio = dropout_ratio
        self.residual = residual

    def __call__(self, x):
        h = self.linear(x)
        if self.use_bn:
            h = self.bn(h)
        if self.activation is not None:
            h = self.activation(h)
        if self.residual:
            h = residual_add(h, x)
        if self.dropout_ratio > 0:
            h = self.dropout(h)
        return h


In [11]:
import os

os.environ['TORCH_HOME'] = '/kaggle/input/densenet121/' #setting the environment variable


# core underlying model 
class densenet(nn.Module):
    def __init__(self,in_channels = 1,out_dim=10, use_bn=True, dropout = -1):
        super(densenet, self).__init__()
        
        # convolution -- I do not know the point is for this
        # I will ignore this for now
        self.conv0 = nn.Sequential(nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1, bias=True))
        
        # pretrained model 
        self.base_model = models.densenet121(pretrained=False)
       
        inch = self.base_model.classifier.in_features
        
        # should move to train parameters
        activation = F.leaky_relu
        hdim = 512
        n_total_graphemes = 1285
        
        self.lin1 = LinearBlock(inch, hdim, use_bn=use_bn, activation=activation, 
                                dropout_ratio = dropout, residual=False)
        
        # predicts the whole grapheme 
        # the out dimension is now the number of classes for whole grapheme prediction
        self.lin2 = LinearBlock(hdim, n_total_graphemes, use_bn=use_bn, activation=None, residual=False)
        
        # the input is the concatenation of lin1 and lin 2
        # input = h_dim + out_dim_lin2; output = out_dim
        self.lin3 = LinearBlock(hdim + n_total_graphemes, out_dim, use_bn=use_bn, activation=None, residual=False)
      
    # the core forward pass
    def forward(self, x):
        h = self.conv0(x)
        h = self.base_model.features(h) # I want to make sure that this is correct
        h = torch.sum(h, dim=(-1, -2)) # pooling function 
        
        # take out of loop and write out manually
        h1 = self.lin1(h)
        h_grapheme = self.lin2(h1)
        out = self.lin3(torch.cat((h1, h_grapheme), 1))
       
        
        return out, h_grapheme
    



In [12]:
class BengaliClassifier(nn.Module):
    def __init__(self, predictor, n_grapheme=168, n_vowel=11, n_consonant=7,data_type='train'):
        super(BengaliClassifier, self).__init__()
        self.n_grapheme = n_grapheme
        self.n_vowel = n_vowel
        self.n_consonant = n_consonant
        self.n_total_class = self.n_grapheme + self.n_vowel + self.n_consonant
        self.predictor = predictor
        self.data_type = data_type

        self.metrics_keys = [
            'loss', 'loss_grapheme', 'loss_vowel', 'loss_consonant',
            'acc_grapheme', 'acc_vowel', 'acc_consonant', 'weighted_recall']

    def forward(self, x, whole_grapheme_loss=False, y=None):
        pred = self.predictor(x)
        
        # if i return the whole grapheme prediction then split the tuple
        if isinstance(pred, tuple):
            assert len(pred) == 2
            pred_grapheme = pred[1]
            pred = pred[0]
       
        assert pred.shape[1] == self.n_total_class
        preds = torch.split(pred, [self.n_grapheme, self.n_vowel, self.n_consonant], dim=1)
        
        # compute our individual losses and generate single loss value
        # TODO: test other loss functions
        if self.data_type == 'train':
            # change cross entropy to focal loss
            #loss_grapheme = FocalLoss(preds[0], y[:, 0])
            #loss_vowel = FocalLoss(preds[1], y[:, 1])
            #loss_consonant = FocalLoss(preds[2], y[:, 2])
            loss_grapheme = F.cross_entropy(preds[0], y[:, 0])
            loss_vowel = F.cross_entropy(preds[1], y[:, 1])
            loss_consonant = F.cross_entropy(preds[2], y[:, 2])
            
            loss = loss_grapheme + loss_vowel + loss_consonant
            if whole_grapheme_loss:
                loss += F.cross_entropy(pred_grapheme, y[:, 3])
        
        # metric summary
            metrics = {
                'loss': loss.item(),
                'loss_grapheme': loss_grapheme.item(),
                'loss_vowel': loss_vowel.item(),
                'loss_consonant': loss_consonant.item(),
                'acc_grapheme': accuracy(preds[0], y[:, 0]),
                'acc_vowel': accuracy(preds[1], y[:, 1]),
                'acc_consonant': accuracy(preds[2], y[:, 2]),
                'weighted_recall': macro_recall(pred, y) # will figure this out later
            }
        
            return loss, metrics, pred
        else:
            return preds

    # run our prediction
    def calc(self, data_loader):
        device: torch.device = next(self.parameters()).device
        self.eval()
        output_list = []
        with torch.no_grad():
            for batch in tqdm(data_loader):
                
                batch = batch.to(device)
                pred = self.predictor(batch)
                output_list.append(pred)
        output = torch.cat(output_list, dim=0)
        preds = torch.split(output, [self.n_grapheme, self.n_vowel, self.n_consonant], dim=1)
        return preds

    # return probabilities
    def predict_proba(self, data_loader):
        preds = self.calc(data_loader)
        return [F.softmax(p, dim=1) for p in preds]

    # return actual predictions
    def predict(self, data_loader):
        preds = self.calc(data_loader)
        pred_labels = [torch.argmax(p, dim=1) for p in preds]
        return pred_labels


## Evaluate Our Models

In [13]:
# define our paths and parameters
inputdir = "/kaggle/input/bengaliai-cv19"

# Parameters
data_type = 'test'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant

bs = 32
split_num = 3


In [14]:
# instantiate model
predictor = densenet(in_channels=1, out_dim=n_total, use_bn = True, dropout = -1).to(device)
state_dict =torch.load('/kaggle/input/'+ model_name)
#predictor=load_my_state_dict(model, state_dict)

classifier = BengaliClassifier(predictor,data_type='test')
classifier.load_state_dict(state_dict)
classifier = classifier.to(device)
#classifier

In [15]:
# run 
predictor.eval()
classifier.eval()

grapheme_list = []
vowel_list = []
consonant_list = []

for i in range(4):
    indices = [i] # stream datasets to reduce memory
    submission = True
    
    for j in range(split_num):
        # we then only handle 1/3rd of the data at a time
        dataset = genDataset(indices, inputdir, split_num = split_num, split_index = j) # generates the dataset class

        # push to data loader
        test_loader = DataLoader(dataset, batch_size=bs, shuffle = False)

        del dataset
        gc.collect()

        with torch.no_grad():
            for inputs in tqdm(test_loader):
                inputs = inputs.to(device)

                grapheme,vowel,consonant = classifier(inputs.unsqueeze(1).float())

                grapheme_list += list(grapheme.cpu().detach().numpy())
                vowel_list += list(vowel.cpu().detach().numpy())
                consonant_list += list(consonant.cpu().detach().numpy())

  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00,  1.23it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 37.34it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 45.53it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 31.57it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 44.97it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 42.22it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 41.77it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 44.23it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 43.82it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 42.20it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 43.81it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 41.28it/s]


In [16]:
row_id = []
our_preds = []
for i in tqdm(range(len(grapheme_list))):
    row_id += [f'Test_{i}_grapheme_root', f'Test_{i}_vowel_diacritic',
               f'Test_{i}_consonant_diacritic']
    our_preds += [grapheme_list[i], vowel_list[i], consonant_list[i]]
    
#our_preds, row_id

100%|██████████| 12/12 [00:00<00:00, 38216.89it/s]


In [17]:
#np.array(our_preds).shape, row_id

In [18]:
# clean the environment
del classifier, test_loader, inputs, grapheme_list, vowel_list, consonant_list, predictor, state_dict
gc.collect()

0

# Efficient net model

In [19]:
import os
import time, gc
import numpy as np
import pandas as pd

import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Model, Input
from keras.layers import Dense, Lambda
from math import ceil

# Install EfficientNet
!pip install '../input/kerasefficientnetb3/efficientnet-1.0.0-py3-none-any.whl'
import efficientnet.keras as efn

Using TensorFlow backend.


Processing /kaggle/input/kerasefficientnetb3/efficientnet-1.0.0-py3-none-any.whl
Installing collected packages: efficientnet
Successfully installed efficientnet-1.0.0


In [20]:
# Constants
HEIGHT = 137
WIDTH = 236
FACTOR = 0.70
HEIGHT_NEW = int(HEIGHT * FACTOR)
WIDTH_NEW = int(WIDTH * FACTOR)
CHANNELS = 3
BATCH_SIZE = 16

# Dir
DIR = '../input/bengaliai-cv19'

In [21]:
# Image Size Summary
print(HEIGHT_NEW)
print(WIDTH_NEW)

# Image Prep
def resize_image(img, WIDTH_NEW, HEIGHT_NEW):
    # Invert
    img = 255 - img

    # Normalize
    img = (img * (255.0 / img.max())).astype(np.uint8)

    # Reshape
    img = img.reshape(HEIGHT, WIDTH)
    image_resized = cv2.resize(img, (WIDTH_NEW, HEIGHT_NEW), interpolation = cv2.INTER_AREA)

    return image_resized.reshape(-1)   

95
165


## Create Model

In [22]:
# Generalized mean pool - GeM
gm_exp = tf.Variable(3.0, dtype = tf.float32)
def generalized_mean_pool_2d(X):
    pool = (tf.reduce_mean(tf.abs(X**(gm_exp)), 
                        axis = [1, 2], 
                        keepdims = False) + 1.e-7)**(1./gm_exp)
    return pool

# Create Model
def create_model(input_shape):
    # Input Layer
    input = Input(shape = input_shape)
    
    # Create and Compile Model and show Summary
    x_model = efn.EfficientNetB3(weights = None, include_top = False, input_tensor = input, pooling = None, classes = None)
    
    # UnFreeze all layers
    for layer in x_model.layers:
        layer.trainable = True
    
    # GeM
    lambda_layer = Lambda(generalized_mean_pool_2d)
    lambda_layer.trainable_weights.extend([gm_exp])
    x = lambda_layer(x_model.output)
    
    # multi output
    grapheme_root = Dense(168, activation = 'softmax', name = 'root')(x)
    vowel_diacritic = Dense(11, activation = 'softmax', name = 'vowel')(x)
    consonant_diacritic = Dense(7, activation = 'softmax', name = 'consonant')(x)

    # model
    model = Model(inputs = x_model.input, outputs = [grapheme_root, vowel_diacritic, consonant_diacritic])

    return model

In [23]:
model1 = create_model(input_shape = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))
model2 = create_model(input_shape = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))
model3 = create_model(input_shape = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))
model4 = create_model(input_shape = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))
model5 = create_model(input_shape = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))

In [24]:
model1.load_weights('../input/kerasefficientnetb3/Train1_model_59.h5') # LB 0.9681
model2.load_weights('../input/kerasefficientnetb3/Train1_model_64.h5') # LB 0.9679
#model2.load_weights('../input/kerasefficientnetb3/Train1_model_66.h5') # LB 0.9685
model3.load_weights('../input/kerasefficientnetb3/Train1_model_68.h5') # LB 0.9691
model4.load_weights('../input/kerasefficientnetb3/Train1_model_57.h5') # LB ??
model5.load_weights('../input/kerasefficientnetb3/Train1_model_70.h5') # LB ??

## Data Generator

In [25]:
class TestDataGenerator(keras.utils.Sequence):
    def __init__(self, X, batch_size = 16, img_size = (512, 512, 3), *args, **kwargs):
        self.X = X
        self.indices = np.arange(len(self.X))
        self.batch_size = batch_size
        self.img_size = img_size
                    
    def __len__(self):
        return int(ceil(len(self.X) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indices)
        return X
    
    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        
        for i, index in enumerate(indices):
            image = self.X[index]
            image = np.stack((image,)*CHANNELS, axis=-1)
            image = image.reshape(-1, HEIGHT_NEW, WIDTH_NEW, CHANNELS)
            
            X[i,] = image
        
        return X

In [26]:
# Create Submission File
tgt_cols = ['grapheme_root','vowel_diacritic','consonant_diacritic']

row_ids2, efnet_preds = [], []

preds1 = []
preds2 = []
preds3 = []
preds4 = []
preds5 = []

# Loop through Test Parquet files (X)
for i in range(0, 4):
    # Test Files Placeholder
    test_files = []

    # Read Parquet file
    df = pd.read_parquet(os.path.join(DIR, 'test_image_data_'+str(i)+'.parquet'))
    # Get Image Id values
    image_ids = df['image_id'].values 
    # Drop Image_id column
    df = df.drop(['image_id'], axis = 1)

    # Loop over rows in Dataframe and generate images 
    X = []
    for image_id, index in zip(image_ids, range(df.shape[0])):
        test_files.append(image_id)
        X.append(resize_image(df.loc[df.index[index]].values, WIDTH_NEW, HEIGHT_NEW))

    # Data_Generator
    data_generator_test = TestDataGenerator(X, batch_size = BATCH_SIZE, img_size = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))
        
    # Predict with all 3 models
    preds1 = model1.predict_generator(data_generator_test, verbose = 1)
    preds2 = model2.predict_generator(data_generator_test, verbose = 1)
    preds3 = model3.predict_generator(data_generator_test, verbose = 1)
    preds4 = model4.predict_generator(data_generator_test, verbose = 1)
    preds5 = model5.predict_generator(data_generator_test, verbose = 1)
    
    for i, image_id in zip(range(len(test_files)), test_files):
        
        for subi, col in zip(range(len(preds1)), tgt_cols):
            sub_preds1 = preds1[subi]
            sub_preds2 = preds2[subi]
            sub_preds3 = preds3[subi]
            sub_preds4 = preds4[subi]
            sub_preds5 = preds5[subi]

            # Set Prediction with average of 5 predictions
            row_ids2.append(str(image_id)+'_'+col)
            sub_pred_value = [sub_preds1[i], sub_preds2[i], sub_preds3[i], sub_preds4[i], sub_preds5[i]]
            efnet_preds.append(sub_pred_value)
    
    
    # Cleanup
    del df
    gc.collect()



In [27]:
del model1, model2, model3, model4, model5, preds1, preds2, preds3, preds4, preds5, sub_preds1, sub_preds2, sub_preds3, sub_preds4, sub_preds5
gc.collect()

149530

## SENET Model

In [28]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms as torchtransforms
import cv2
import torch.nn as nn
from tqdm import tqdm_notebook as tqdm
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
modelpath = "/kaggle/input/se-resnext50-32x4d-fold2/se_resnext50_32x4d_fold2.pkl"
root_path="/kaggle/input/bengaliai-cv19"

cuda:0


In [29]:
simple_transform_valid = torchtransforms.Compose([
    torchtransforms.ToTensor(),
    torchtransforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [30]:
class ClsTestDataset(Dataset):
    def __init__(self, df, torchtransforms):
        self.df = df
        self.pathes = self.df.iloc[:,0].values
        self.data = self.df.iloc[:, 1:].values
        self.torchtransforms = torchtransforms

    def __getitem__(self, idx):
        HEIGHT = 137
        WIDTH = 236
        #row = self.df.iloc[idx].values
        path = self.pathes[idx]
        img = self.data[idx, :]
        img = 255 - img.reshape(HEIGHT, WIDTH).astype(np.uint8)
        #img = crop_resize(img, size=128)
        #img = crop_resize(img)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)       
        img = torchtransforms.ToPILImage()(img)
        img = self.torchtransforms(img)
        return path, img
    def __len__(self):
        return len(self.df)

def make_loader(
        data_folder,
        batch_size=64,
        num_workers=2,
        is_shuffle = False,
):

    image_dataset = ClsTestDataset(df = data_folder,
                                    torchtransforms = simple_transform_valid)

    return DataLoader(
    image_dataset,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=True,
    shuffle=is_shuffle
    )

### Model

In [31]:
from __future__ import print_function, division, absolute_import
from collections import OrderedDict
import math

import torch.nn as nn
from torch.utils import model_zoo
__all__ = ['SENet', 'se_resnext50_32x4d']
class SEModule(nn.Module):

    def __init__(self, channels, reduction):
        super(SEModule, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
                             padding=0)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
                             padding=0)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = self.avg_pool(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return module_input * x


class Bottleneck(nn.Module):
    """
    Base class for bottlenecks that implements `forward()` method.
    """
    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out = self.se_module(out) + residual
        out = self.relu(out)

        return out


class SEBottleneck(Bottleneck):
    """
    Bottleneck for SENet154.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes * 2)
        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
                               stride=stride, padding=1, groups=groups,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(planes * 4)
        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
                               bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNetBottleneck(Bottleneck):
    """
    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
    implementation and uses `stride=stride` in `conv1` and not in `conv2`
    (the latter is used in the torchvision implementation of ResNet).
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None):
        super(SEResNetBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
                               stride=stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
                               groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SEResNeXtBottleneck(Bottleneck):
    """
    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
    """
    expansion = 4

    def __init__(self, inplanes, planes, groups, reduction, stride=1,
                 downsample=None, base_width=4):
        super(SEResNeXtBottleneck, self).__init__()
        width = math.floor(planes * (base_width / 64)) * groups
        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
                               stride=1)
        self.bn1 = nn.BatchNorm2d(width)
        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
                               padding=1, groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(width)
        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.se_module = SEModule(planes * 4, reduction=reduction)
        self.downsample = downsample
        self.stride = stride


class SENet(nn.Module):

    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
                 downsample_padding=1, num_classes=1000):        
        super(SENet, self).__init__()
        self.inplanes = inplanes
        if input_3x3:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                    bias=False)),
                ('bn1', nn.BatchNorm2d(64)),
                ('relu1', nn.ReLU(inplace=True)),
                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn2', nn.BatchNorm2d(64)),
                ('relu2', nn.ReLU(inplace=True)),
                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
                                    bias=False)),
                ('bn3', nn.BatchNorm2d(inplanes)),
                ('relu3', nn.ReLU(inplace=True)),
            ]
        else:
            layer0_modules = [
                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
                                    padding=3, bias=False)),
                ('bn1', nn.BatchNorm2d(inplanes)),
                ('relu1', nn.ReLU(inplace=True)),
            ]
        # To preserve compatibility with Caffe weights `ceil_mode=True`
        # is used instead of `padding=1`.
        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
                                                    ceil_mode=True)))
        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
        self.layer1 = self._make_layer(
            block,
            planes=64,
            blocks=layers[0],
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=1,
            downsample_padding=0
        )
        self.layer2 = self._make_layer(
            block,
            planes=128,
            blocks=layers[1],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer3 = self._make_layer(
            block,
            planes=256,
            blocks=layers[2],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.layer4 = self._make_layer(
            block,
            planes=512,
            blocks=layers[3],
            stride=2,
            groups=groups,
            reduction=reduction,
            downsample_kernel_size=downsample_kernel_size,
            downsample_padding=downsample_padding
        )
        self.avg_pool = nn.AvgPool2d(7, stride=1)
        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
        self.last_linear = nn.Linear(512 * block.expansion, num_classes)

    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
                    downsample_kernel_size=1, downsample_padding=0):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=downsample_kernel_size, stride=stride,
                          padding=downsample_padding, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, groups, reduction, stride,
                            downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups, reduction))

        return nn.Sequential(*layers)

    def features(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

    def logits(self, x):
        x = self.avg_pool(x)
        if self.dropout is not None:
            x = self.dropout(x)
        x = x.view(x.size(0), -1)
        x = self.last_linear(x)
        return x

    def forward(self, x):
        x = self.features(x)
        x = self.logits(x)
        return x
    
def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
                  dropout_p=None, inplanes=64, input_3x3=False,
                  downsample_kernel_size=1, downsample_padding=0,
                  num_classes=num_classes)
    return model

In [32]:
model = se_resnext50_32x4d(pretrained=None)
model.avg_pool = nn.AdaptiveAvgPool2d(1)
model.last_linear = nn.Linear(model.last_linear.in_features, 186)
modelvalue = torch.load(modelpath, map_location='cuda:0')
newmodelvalue = {}
for kv in modelvalue:
    newmodelvalue[kv[4:]]=modelvalue[kv]        
model.load_state_dict(newmodelvalue)
#model.load_state_dict(modelvalue)
model = model.to(device)

In [33]:
def getmodeleval(model, dataloaders):
    model.eval()
    tbar = tqdm(dataloaders)
    pathes=[]

    alllogit1 = []
    alllogit2 = []
    alllogit3 = []
    for path, img in tbar:
        img = img.to(device)
        pathes.extend(path)
        with torch.no_grad():
            output = model(img)
        logit1, logit2, logit3 = output[:,: 168],\
                                    output[:,168: 168+11],\
                                    output[:,168+11:]
        logit1 = F.softmax(logit1, dim=1).cpu().numpy()  # 对每一行进行softmax
        logit2 = F.softmax(logit2, dim=1).cpu().numpy()
        logit3 = F.softmax(logit3, dim=1).cpu().numpy()
        alllogit1.extend(logit1.tolist())
        alllogit2.extend(logit2.tolist())
        alllogit3.extend(logit3.tolist())
    alllogit1 = np.array(alllogit1)
    alllogit2 = np.array(alllogit2)
    alllogit3 = np.array(alllogit3)
    
    print("getmodeleval::alllogit1.shape", alllogit1.shape)
    print("getmodeleval::alllogit2.shape", alllogit2.shape)
    print("getmodeleval::alllogit3.shape", alllogit3.shape)
    return pathes, alllogit1, alllogit2, alllogit3

In [34]:
allpathes=[]
allpreds_root = []
allpreds_vowel = []
allpreds_consonant = []
tAllBegin = time.time()
for i in range(4):
    
    test_csv = pd.read_parquet(os.path.join(root_path, f'test_image_data_{i}.parquet'))
    tBegin = time.time()
    split_test = np.array_split(test_csv, 3, axis = 0)
    
    for test_df in split_test:
    
        dataloaders = make_loader(data_folder = test_df,
                                               batch_size=8,
                                               num_workers = 1,
                                               is_shuffle = False)
        pathes, logit1, logit2, logit3 = getmodeleval(model, dataloaders)
        #preds_root = np.argmax(logit1, axis=1)
        #preds_vowel = np.argmax(logit2, axis=1)
        #preds_consonant = np.argmax(logit3, axis=1)

        allpathes.extend(pathes)
        allpreds_root.extend(logit1.tolist())
        allpreds_vowel.extend(logit2.tolist())
        allpreds_consonant.extend(logit3.tolist())
        
        del logit1, logit2, logit3
        gc.collect()
    del test_csv, split_test
    gc.collect()
        
    tEnd = time.time()
    print(i, int(round(tEnd * 1000)) - int(round(tBegin * 1000)), "ms")
tAllEnd = time.time()
print(len(allpathes), len(allpreds_root), len(allpreds_vowel), len(allpreds_consonant),  int(round(tAllEnd * 1000)) - int(round(tAllBegin * 1000)), "ms")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  This is separate from the ipykernel package so we can avoid doing imports until


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)
0 1769 ms


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)
1 1819 ms


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)
2 1703 ms


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)


HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))


getmodeleval::alllogit1.shape (1, 168)
getmodeleval::alllogit2.shape (1, 11)
getmodeleval::alllogit3.shape (1, 7)
3 1804 ms
12 12 12 12 15997 ms


In [35]:

row_ids3=[]
se_net_preds=[]
for idx, image_id in enumerate(allpathes):
    se_net_preds.extend([allpreds_root[idx]])
    se_net_preds.extend([allpreds_vowel[idx]])
    se_net_preds.extend([allpreds_consonant[idx]])

    row_ids3.extend([str(image_id) + '_grapheme_root'])
    row_ids3.extend([str(image_id) + '_vowel_diacritic'])
    row_ids3.extend([str(image_id) + '_consonant_diacritic'])


In [36]:
np.array(se_net_preds[1]).shape, np.array(our_preds[1]).shape

((11,), (11,))

# Ensemble Everything


In [37]:
targets = []
num_models = 9
for i in range(len(row_id)):
    # Set Prediction with average of 5 predictions
    sub_pred_value = np.argmax((our_preds[i] + se_net_preds[i] + se_net_preds[i] +
                                efnet_preds[i][0] + efnet_preds[i][1] + efnet_preds[i][2] + efnet_preds[i][3] + efnet_preds[i][4]) / num_models)
    targets.append(sub_pred_value)

In [38]:

submission_df = pd.DataFrame({'row_id': row_id, 'target': targets})
submission_df.head()
submission_df.to_csv('submission.csv', index=False)