In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

/kaggle/input/model3/weighted_model.pt
/kaggle/input/bengaliai-cv19/train.csv
/kaggle/input/bengaliai-cv19/train_image_data_2.parquet
/kaggle/input/bengaliai-cv19/test_image_data_1.parquet
/kaggle/input/bengaliai-cv19/class_map.csv
/kaggle/input/bengaliai-cv19/train_image_data_3.parquet
/kaggle/input/bengaliai-cv19/test_image_data_2.parquet
/kaggle/input/bengaliai-cv19/train_image_data_1.parquet
/kaggle/input/bengaliai-cv19/test_image_data_0.parquet
/kaggle/input/bengaliai-cv19/test.csv
/kaggle/input/bengaliai-cv19/train_image_data_0.parquet
/kaggle/input/bengaliai-cv19/sample_submission.csv
/kaggle/input/bengaliai-cv19/test_image_data_3.parquet
/kaggle/input/densenet121/densenet121-a639ec97.pth


In [2]:
# Packages
import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from tqdm import tqdm

import gc
import os
import random
import sys
import albumentations as A
import pyarrow
import cv2
import itertools

import six
from torch.utils.data import Dataset


from torch import nn
from torch.nn.parameter import Parameter
import torch.nn.functional as F
from torch.nn import Sequential
from torchvision import models
import torchvision


In [3]:
# Data Processing Parameters
HEIGHT = 137
WIDTH = 236
SIZE = 128

model_name = "model3/weighted_model.pt"

> ## Define our dataset classes

In [4]:
# wrapper
class DatasetMixin(Dataset):

    def __init__(self, transform=None):
        self.transform = transform

    def __getitem__(self, index):
        """Returns an example or a sequence of examples."""
        if torch.is_tensor(index):
            index = index.tolist()
        if isinstance(index, slice):
            current, stop, step = index.indices(len(self))
            return [self.get_example_wrapper(i) for i in
                    six.moves.range(current, stop, step)]
        elif isinstance(index, list) or isinstance(index, np.ndarray):
            return [self.get_example_wrapper(i) for i in index]
        else:
            return self.get_example_wrapper(index)

    def __len__(self):
        """Returns the number of data points."""
        raise NotImplementedError

    def get_example_wrapper(self, i):
        """Wrapper of `get_example`, to apply `transform` if necessary"""
        example = self.get_example(i)
        
        if self.transform:
            
            if self.labels:
                example_img = self.transform(image = example[0])['image']
            
                return example_img, example[1]
        
            # doing this terrible code, because Idk how to make the super take a none type
            example_img = self.transform(image = example)['image']
            return example

    def get_example(self, i):
        """Returns the i-th example.

        Implementations should override it. It should raise :class:`IndexError`
        if the index is invalid.

        Args:
            i (int): The index of the example.

        Returns:
            The i-th example.

        """
        raise NotImplementedError


In [5]:

# actual class
class BengaliAIDataset(DatasetMixin):
    def __init__(self, images, labels=None, transform=None, indices=None):
        super(BengaliAIDataset, self).__init__(transform=transform)
        self.images = images
        self.labels = labels
        if indices is None:
            indices = np.arange(len(images))
        self.indices = indices
        self.train = labels is not None

    def __len__(self):
        """return length of this dataset"""
        return len(self.indices)

    def get_example(self, i):
        """Return i-th data"""
        i = self.indices[i]
        x = self.images[i]
        
        # for future Affine transformation
        x = x.astype(np.float32)
        if self.train:
            y = self.labels[i]
            return x, y
        else:
            return x

## Define our Data Processing Pipeline

In [6]:
###################################################################
# Load Images
###################################################################

# during submission, we load via parquet
def prepare_image(datadir, data_type, submission=False, indices=[0, 1, 2, 3]):

    assert data_type in ['train', 'test']
    if submission:
        image_df_list = [pd.read_parquet(datadir + f'/{data_type}_image_data_{i}.parquet')
                         for i in indices]
    else:
        image_df_list = [pd.read_feather(datadir + f'/{data_type}_image_data_{i}.feather')
                         for i in indices]

    print('image_df_list', len(image_df_list))
    HEIGHT = 137
    WIDTH = 236
    
    #somehow the original input is inverted
    images = [df.iloc[:, 1:].values.reshape(-1, HEIGHT, WIDTH) for df in image_df_list]
    
    del image_df_list
    gc.collect()
    images = np.concatenate(images, axis=0)
    return images

# convert our graphemes to labels
def convertGrapheme(train):
    graphemes = train.grapheme.unique()
    num_graphemes = len(graphemes)
    grapheme_dict = dict(zip(graphemes, range(num_graphemes)))
    
    return train.replace({"grapheme":grapheme_dict})


In [7]:
###################################################################
# Crop and Resize our images
###################################################################
# bounding box
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    
    #remove low intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    #img = cv2.normalize(img+255, None, dtype=cv2.CV_32F)
    return cv2.resize(img,(size,size))

def crop_char_image(image, threshold=40./255.):
    assert image.ndim == 2
    is_black = image > threshold

    is_black_vertical = np.sum(is_black, axis=0) > 0
    is_black_horizontal = np.sum(is_black, axis=1) > 0
    left = np.argmax(is_black_horizontal)
    right = np.argmax(is_black_horizontal[::-1])
    top = np.argmax(is_black_vertical)
    bottom = np.argmax(is_black_vertical[::-1])
    height, width = image.shape
    cropped_image = image[left:height - right, top:width - bottom]
    return cropped_image

def resize(image, size=(128, 128)):
    return cv2.resize(image, size)

# run for all images 
def runCropRsz(images):
    crop_rsz_img = []
    for idx in range(len(images)):
        img = images[idx]
        img = crop_char_image(img, threshold = 40./255.)
        img = resize(img)
        
        # add to our stored list
        crop_rsz_img.append(img)

    crop_rsz_img = np.array(crop_rsz_img)
    
    return crop_rsz_img


In [8]:

###################################################################
# Data Augmentations Pipeline
###################################################################
# define our augmentations
def augPipeline(P = .5):
    return A.Compose([
        A.IAAAdditiveGaussianNoise(p=.3),
        A.OneOf([
            A.MedianBlur(blur_limit=3, p=0.4),
            A.Blur(blur_limit=1, p=0.4),
        ], p=0.5),
        A.ShiftScaleRotate(shift_limit=.1, scale_limit=0.0, rotate_limit=15, p=.75),
        A.OneOf([
            A.OpticalDistortion(p=.4),
            A.GridDistortion(p=.2),
            A.IAAPiecewiseAffine(p=.5),
        ], p=.33)], p=P)
    
    
# generates weights by class to pass into a sampler during training
def genWeightTensor(column, train):
    class_counts = train[column].value_counts()
    weight = 1 / class_counts
    return torch.tensor([weight[t] for t in train[column]])


In [9]:
###################################################################
# Main Function to Generate and Load the dataset
###################################################################
def genDataset(indices, inputdir, data_type = "train", train = None):
    assert data_type in ['train', 'test']    
    
    submission = False if data_type == "train" else True
    indices = indices # which train files to load 
    images = prepare_image(inputdir, data_type=data_type, submission=submission, indices=indices)
    #images = images[:int(round(len(images)*.5,0))]
    print("~~Loaded Images~~")
    
    
    # run our crop and resize functions
    crop_rsz_img = runCropRsz(images)
    print("~~Standardized Images~~")

    p = .5 if data_type == "train" else 0
    # init augmentation pipeline
    augmentation = augPipeline(p)

    # generate our dataset
    if data_type == "train":
        train_labels = train[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].values
        dataset = BengaliAIDataset(crop_rsz_img, labels = train_labels[:len(crop_rsz_img)], transform = augmentation) 
        
        return dataset, crop_rsz_img
    else:
        dataset = BengaliAIDataset(crop_rsz_img, transform = augmentation)
        del images, crop_rsz_img
        gc.collect()
        return dataset


## Define Our Model Classes

In [10]:
# this is just a skip connection (very important if we have a ton of layers)
# if we want to concatenate input with the output of the linear layer + activation
def residual_add(lhs, rhs):
    lhs_ch, rhs_ch = lhs.shape[1], rhs.shape[1]
    if lhs_ch < rhs_ch:
        out = lhs + rhs[:, :lhs_ch]
    elif lhs_ch > rhs_ch:
        out = torch.cat([lhs[:, :rhs_ch] + rhs, lhs[:, rhs_ch:]], dim=1)
    else:
        out = lhs + rhs
    return out


# block of linear functions - this is a single layer and can be changed 
# change this if we want to change our functions
class LinearBlock(nn.Module):

    def __init__(self, in_features, out_features, bias=True,
                 use_bn=True, activation=F.relu, dropout_ratio=-1, residual=False,):
        super(LinearBlock, self).__init__()
        
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        if use_bn:
            self.bn = nn.BatchNorm1d(out_features)
        if dropout_ratio > 0.:
            self.dropout = nn.Dropout(p=dropout_ratio)
        else:
            self.dropout = None
        self.activation = activation
        self.use_bn = use_bn
        self.dropout_ratio = dropout_ratio
        self.residual = residual

    def __call__(self, x):
        h = self.linear(x)
        if self.use_bn:
            h = self.bn(h)
        if self.activation is not None:
            h = self.activation(h)
        if self.residual:
            h = residual_add(h, x)
        if self.dropout_ratio > 0:
            h = self.dropout(h)
        return h


In [11]:
import os

os.environ['TORCH_HOME'] = '/kaggle/input/densenet121/' #setting the environment variable


# core underlying model 
class densenet(nn.Module):
    def __init__(self,in_channels = 1,out_dim=10, use_bn=True):
        super(densenet, self).__init__()
        
        # convolution -- I do not know the point is for this
        # I will ignore this for now
        self.conv0 = nn.Sequential(nn.Conv2d(1, 3, kernel_size=3, stride=1, padding=1, bias=True))
        
        # pretrained model 
        self.base_model = models.densenet121(pretrained=False)
       
        inch = self.base_model.classifier.in_features
        
        # should move to train parameters
        activation = F.leaky_relu
        hdim = 512
        n_total_graphemes = 1285
        
        self.lin1 = LinearBlock(inch, hdim, use_bn=use_bn, activation=activation, residual=False)
        
        # predicts the whole grapheme 
        # the out dimension is now the number of classes for whole grapheme prediction
        self.lin2 = LinearBlock(hdim, n_total_graphemes, use_bn=use_bn, activation=None, residual=False)
        
        # the input is the concatenation of lin1 and lin 2
        # input = h_dim + out_dim_lin2; output = out_dim
        self.lin3 = LinearBlock(hdim + n_total_graphemes, out_dim, use_bn=use_bn, activation=None, residual=False)
        
        #self.lin_layers = Sequential(lin1, lin2, lin3)

    # the core forward pass
    def forward(self, x):
        h = self.conv0(x)
        h = self.base_model.features(h) # I want to make sure that this is correct
        h = torch.sum(h, dim=(-1, -2)) # pooling function 
        
        # take out of loop and write out manually
        h1 = self.lin1(h)
        h_grapheme = self.lin2(h1)
        out = self.lin3(torch.cat((h1, h_grapheme), 1))
       
        
        return out
    



In [12]:
class BengaliClassifier(nn.Module):
    def __init__(self, predictor, n_grapheme=168, n_vowel=11, n_consonant=7,data_type='train'):
        super(BengaliClassifier, self).__init__()
        self.n_grapheme = n_grapheme
        self.n_vowel = n_vowel
        self.n_consonant = n_consonant
        self.n_total_class = self.n_grapheme + self.n_vowel + self.n_consonant
        self.predictor = predictor
        self.data_type = data_type

        self.metrics_keys = [
            'loss', 'loss_grapheme', 'loss_vowel', 'loss_consonant',
            'acc_grapheme', 'acc_vowel', 'acc_consonant', 'weighted_recall']

    def forward(self, x, y=None):
        pred = self.predictor(x)
        
        if isinstance(pred, tuple):
            assert len(pred) == 3
            preds = pred
        else:
            assert pred.shape[1] == self.n_total_class
            preds = torch.split(pred, [self.n_grapheme, self.n_vowel, self.n_consonant], dim=1)
           
        # compute our individual losses and generate single loss value
        # TODO: test other loss functions
        if self.data_type == 'train':
            loss_grapheme = F.cross_entropy(preds[0], y[:, 0])
            loss_vowel = F.cross_entropy(preds[1], y[:, 1])
            loss_consonant = F.cross_entropy(preds[2], y[:, 2])
            loss = loss_grapheme + loss_vowel + loss_consonant
        
        # metric summary
            metrics = {
                'loss': loss.item(),
                'loss_grapheme': loss_grapheme.item(),
                'loss_vowel': loss_vowel.item(),
                'loss_consonant': loss_consonant.item(),
                'acc_grapheme': accuracy(preds[0], y[:, 0]),
                'acc_vowel': accuracy(preds[1], y[:, 1]),
                'acc_consonant': accuracy(preds[2], y[:, 2])
                #'weighted_recall': macro_recall(preds, y) # will figure this out later
            }
        
            return loss, metrics, pred
        else:
            return preds

    # run our prediction
    def calc(self, data_loader):
        device: torch.device = next(self.parameters()).device
        self.eval()
        output_list = []
        with torch.no_grad():
            for batch in tqdm(data_loader):
                
                batch = batch.to(device)
                pred = self.predictor(batch)
                output_list.append(pred)
        output = torch.cat(output_list, dim=0)
        preds = torch.split(output, [self.n_grapheme, self.n_vowel, self.n_consonant], dim=1)
        return preds

    # return probabilities
    def predict_proba(self, data_loader):
        preds = self.calc(data_loader)
        return [F.softmax(p, dim=1) for p in preds]

    # return actual predictions
    def predict(self, data_loader):
        preds = self.calc(data_loader)
        pred_labels = [torch.argmax(p, dim=1) for p in preds]
        return pred_labels


# Run Evaluation

In [13]:
# define our paths and parameters
inputdir = "/kaggle/input/bengaliai-cv19"

# Parameters
data_type = 'test'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch_size=128

n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_total = n_grapheme + n_vowel + n_consonant

bs = 32


In [14]:
# instantiate model
predictor = densenet(in_channels=1, out_dim=n_total).to(device)
state_dict =torch.load('/kaggle/input/'+ model_name)
#predictor=load_my_state_dict(model, state_dict)

classifier = BengaliClassifier(predictor,data_type='test')
classifier.load_state_dict(state_dict)
classifier = classifier.to(device)
classifier

BengaliClassifier(
  (predictor): densenet(
    (conv0): Sequential(
      (0): Conv2d(1, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
    (base_model): DenseNet(
      (features): Sequential(
        (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu0): ReLU(inplace=True)
        (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (denseblock1): _DenseBlock(
          (denselayer1): _DenseLayer(
            (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu1): ReLU(inplace=True)
            (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu2): ReLU(inplace=True)
            (conv2): Conv2d(128,

In [15]:
#test = pd.read_csv(inputdir+'/test.csv')


In [16]:
# run 
predictor.eval()
classifier.eval()

grapheme_list = []
vowel_list = []
consonant_list = []

for i in range(4):
    indices = [i] # stream datasets to reduce memory
    submission = True
    dataset = genDataset(indices, inputdir, data_type = "test") # generates the dataset class

    # push to data loader
    test_loader = DataLoader(dataset, batch_size=bs, shuffle = False)

    del dataset
    gc.collect()

    with torch.no_grad():
        for inputs in tqdm(test_loader):
            inputs = inputs.to(device)

            grapheme,vowel,consonant = classifier(inputs.unsqueeze(1).float())

            grapheme_list += list(grapheme.argmax(1).cpu().detach().numpy())
            vowel_list += list(vowel.argmax(1).cpu().detach().numpy())
            consonant_list += list(consonant.argmax(1).cpu().detach().numpy())

  0%|          | 0/1 [00:00<?, ?it/s]

image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00,  1.19it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 44.30it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 30.60it/s]


image_df_list 1
~~Loaded Images~~
~~Standardized Images~~


100%|██████████| 1/1 [00:00<00:00, 33.00it/s]


In [17]:
###submission
#submission = pd.read_csv(inputdir + '/sample_submission.csv')
#submission.target = np.hstack(np.asarray(predictions).T)
#submission
#submission.to_csv('submission.csv', index=False)

In [18]:
row_id = []
target = []
for i in tqdm(range(len(grapheme_list))):
    row_id += [f'Test_{i}_grapheme_root', f'Test_{i}_vowel_diacritic',
               f'Test_{i}_consonant_diacritic']
    target += [grapheme_list[i], vowel_list[i], consonant_list[i]]
submission_df = pd.DataFrame({'row_id': row_id, 'target': target})
submission_df.to_csv('submission.csv', index=False)

100%|██████████| 12/12 [00:00<00:00, 27700.41it/s]
