- **Auto Augmentation** - https://towardsdatascience.com/how-to-improve-your-image-classifier-with-googles-autoaugment-77643f0be0c9
- **Synthesize Font Images** - https://www.kaggle.com/c/bengaliai-cv19/discussion/127938#775496
- **BengaliAI First Solution Writeup** - https://www.kaggle.com/c/bengaliai-cv19/discussion/135984
- **Bengali External Datasets** - https://www.kaggle.com/c/bengaliai-cv19/discussion/122396
- **Layer Normalization** - https://mlexplained.com/2018/01/13/weight-normalization-and-layer-normalization-explained-normalization-in-deep-learning-part-2/

# Install libraqm

In [None]:
!apt-get install -y libfreetype6-dev libharfbuzz-dev libfribidi-dev gtk-doc-tools
!wget -O raqm-0.7.0.tar.gz https://raw.githubusercontent.com/python-pillow/pillow-depends/master/raqm-0.7.0.tar.gz
!tar -xzvf /kaggle/working/raqm-0.7.0.tar.gz
!cd /kaggle/working/raqm-0.7.0/ &amp;&amp; ./configure --prefix=/usr &amp;&amp; make -j4 &amp;&amp; make -j4 install

# Synthesize Font Images

In [None]:
import PIL.Image as Image, PIL.ImageDraw as ImageDraw, PIL.ImageFont as ImageFont

HEIGHT = 137
WIDTH = 236

def image_from_char(font_url, grapheme_char, grapheme_size):
    image = Image.new('RGB', (WIDTH, HEIGHT))
    draw = ImageDraw.Draw(image)
    myfont = ImageFont.truetype(font_url, grapheme_size)
    w, h = draw.textsize(grapheme_char, font=myfont)
    draw.text(((WIDTH - w) / 2,(HEIGHT - h) / 3), grapheme_char, font=myfont)

    return image

In [None]:
import pandas as pd
import numpy as np

In [None]:
font = pd.read_csv("../input/bengaliai-cv19-font/font.csv", index_col=0)
font.head()

In [None]:
grapheme_sizes = [84, 96, 108, 120]

font_1397 = font.iloc[1397]['grapheme']
kalpurush_fonts = '/kaggle/input/kalpurush-fonts/kalpurush-2.ttf'
nikoshlightban_fonts = '/kaggle/input/nikoshlightbanfonts/NikoshLightBan.ttf'

In [None]:
font_1397_image = image_from_char(nikoshlightban_fonts, font_1397, grapheme_sizes[0])
font_1397_image

In [None]:
font_1397_image = image_from_char(nikoshlightban_fonts, font_1397, grapheme_sizes[1])
font_1397_image

In [None]:
font_1397_image = image_from_char(nikoshlightban_fonts, font_1397, grapheme_sizes[2])
font_1397_image

In [None]:
font_1397_image = image_from_char(nikoshlightban_fonts, font_1397, grapheme_sizes[3])
font_1397_image

In [None]:
data = np.asarray(font_1397_image, dtype="int32")
# data = data.flatten()

In [None]:
data.shape

# Font Data (bengaliai-cv19-font)
- **Font Files** were generated using `.ttf(true type font)` files which were publically available fonts for Bengali. `ttf files` files used in this competition are `kalpurush-2.ttf` and `NikoshLightBan.ttf`.

- **font.csv** file contains `(168*11*8) = 14784` unique graphemes and total of `14784 * 4(four diff. size graphemes) = 59136` graphemes and their respecitve labels

- **font_image_data_0.parquet** --> 14784 graphemes (size 80)
- **font_image_data_0.parquet** --> 14784 graphemes (size 96)
- **font_image_data_0.parquet** --> 14784 graphemes (size 108)
- **font_image_data_0.parquet** --> 14784 graphemes (size 120)

# Script for Predicted relationship b/w grapheme and labels

In [None]:
import pandas as pd

class_map = pd.read_csv('../input/bengaliai-cv19/class_map.csv')

grapheme_root = class_map[class_map['component_type'] == 'grapheme_root']
vowel_diacritic = class_map[class_map['component_type'] == 'vowel_diacritic']
consonant_diacritic = class_map[class_map['component_type'] == 'consonant_diacritic']

grapheme_root_list = grapheme_root['component'].tolist()
vowel_diacritic_list = vowel_diacritic['component'].tolist()
consonant_diacritic_list = consonant_diacritic['component'].tolist()

def label_to_grapheme(grapheme_root, vowel_diacritic, consonant_diacritic):
    if consonant_diacritic == 0:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root]
        else:
            return grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic]
        
    elif consonant_diacritic == 1:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            return grapheme_root_list[grapheme_root] + vowel_diacritic_list[vowel_diacritic] + \
                   consonant_diacritic_list[consonant_diacritic]
        
    elif consonant_diacritic == 2:
        if vowel_diacritic == 0:
            return consonant_diacritic_list[consonant_diacritic] + grapheme_root_list[grapheme_root]
        else:
            return consonant_diacritic_list[consonant_diacritic] + grapheme_root_list[grapheme_root] + \
                   vowel_diacritic_list[vowel_diacritic]
        
    elif consonant_diacritic == 3:
        if vowel_diacritic == 0:
            return consonant_diacritic_list[consonant_diacritic][:2] + grapheme_root_list[grapheme_root] + \
                   consonant_diacritic_list[consonant_diacritic][1:]
        else:
            return consonant_diacritic_list[consonant_diacritic][:2] + grapheme_root_list[grapheme_root] + \
                   consonant_diacritic_list[consonant_diacritic][1:] + vowel_diacritic_list[vowel_diacritic]
        
    elif consonant_diacritic == 4:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            if grapheme_root == 123 and vowel_diacritic == 1:
                return grapheme_root_list[grapheme_root] + '\u200d' + consonant_diacritic_list[consonant_diacritic] + \
                       vowel_diacritic_list[vowel_diacritic]
            return grapheme_root_list[grapheme_root]  + consonant_diacritic_list[consonant_diacritic] + \
                   vowel_diacritic_list[vowel_diacritic]
        
    elif consonant_diacritic == 5:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + \
                   vowel_diacritic_list[vowel_diacritic]
        
    elif consonant_diacritic == 6:
        if vowel_diacritic == 0:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic]
        else:
            return grapheme_root_list[grapheme_root] + consonant_diacritic_list[consonant_diacritic] + \
                   vowel_diacritic_list[vowel_diacritic]
        
    elif consonant_diacritic == 7:
        if vowel_diacritic == 0:
            return consonant_diacritic_list[2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[2][::-1]
        else:
            return consonant_diacritic_list[2] + grapheme_root_list[grapheme_root] + consonant_diacritic_list[2][::-1] + \
                   vowel_diacritic_list[vowel_diacritic]

In [None]:
label_to_grapheme(67, 6, 3)

# Important imports and installation

In [None]:
!pip install efficientnet-pytorch

In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torchvision
from torchvision import transforms
import albumentations as A
from efficientnet_pytorch import EfficientNet
import gc
import cv2
from tqdm.notebook import tqdm
import sklearn.metrics
import json

In [None]:
MEAN = [0.5, 0.5, 0.5]
STD = [0.5, 0.5, 0.5]
IMG_HEIGHT = 224
IMG_WIDTH = 224
BATCH_SIZE = 32
EPOCH = 40
TQDM_DISABLE = False

In [None]:
device = torch.device("cuda")

# Load Dataset

In [None]:
def load_images(paths):
    all_images = []
    
    for path in paths:
        image_df = pd.read_parquet(path)
        images = image_df.iloc[:, 1:].values.reshape(-1, 137, 236).astype(np.uint8)
        
        del image_df
        gc.collect()
        
        all_images.append(images)
    all_images = np.concatenate(all_images)
    
    return all_images

In [None]:
font_data = pd.read_csv('../input/bengaliai-cv19-font/font.csv', index_col=0)

In [None]:
font_data.head()

In [None]:
%%time

font_images = load_images([
    '../input/bengaliai-cv19-font/font_image_data_0.parquet',
    '../input/bengaliai-cv19-font/font_image_data_1.parquet',
    '../input/bengaliai-cv19-font/font_image_data_2.parquet',
    '../input/bengaliai-cv19-font/font_image_data_3.parquet',
])

# Create Dataset

In [None]:
class GraphemeDataset(torch.utils.data.Dataset):
    
    def __init__(self, data, images, transform=None,
                 num_grapheme_root=168,
                 num_vowel_diacritic=11,
                 num_consonant_diacritic=8):
        self.data = data
        self.images = images
        self.transform = transform
        self.num_grapheme_root = num_grapheme_root
        self.num_vowel_diacritic = num_vowel_diacritic
        self.num_consonant_diacritic = num_consonant_diacritic
        
        self.grapheme_root_list = np.array(data['grapheme_root'].tolist(), 
                                           dtype=np.int64)
        self.vowel_diacritic_list = np.array(data['vowel_diacritic'].tolist(), 
                                             dtype=np.int64)
        self.consonant_diacritic_list = np.array(data['consonant_diacritic'].tolist(), 
                                                 dtype=np.int64)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        grapheme_root = self.grapheme_root_list[idx]
        vowel_diacritic = self.vowel_diacritic_list[idx]
        consonant_diacritic = self.consonant_diacritic_list[idx]
        
        label = (grapheme_root * self.num_vowel_diacritic + vowel_diacritic) * self.num_consonant_diacritic + consonant_diacritic
        
        np_image = self.images[idx].copy()
        out_image = self.transform(np_image)
        return out_image, label

# Image Augmentations

In [None]:
class Albumentations:
    
    def __init__(self, augmentations):
        self.augmentations = A.Compose(augmentations)
        
    def __call__(self, image):
        image = self.augmentations(image=image)['image']
        return image
    

preprocess = [
    A.CenterCrop(height=137, width=IMG_WIDTH),
    A.Resize(height=IMG_HEIGHT, width=IMG_WIDTH, always_apply=True),
]

augmentations = [
    A.PadIfNeeded(min_height=256, min_width=256, border_mode=cv2.BORDER_CONSTANT, 
                  value=[255, 255, 255], always_apply=True),
    A.imgaug.transforms.IAAAffine(shear=20, mode='constant', cval=255, always_apply=True),
    A.ShiftScaleRotate(rotate_limit=20, border_mode=cv2.BORDER_CONSTANT, 
                       value=[255, 255, 255], mask_value=[255, 255, 255], always_apply=True),
    A.RandomCrop(height=IMG_HEIGHT, width=IMG_WIDTH, always_apply=True),
    A.Cutout(num_holes=1, max_h_size=112, max_w_size=112, fill_value=128, always_apply=True),
]

# PreProcessing Transformer

- https://pytorch.org/docs/0.2.0/_modules/torchvision/transforms.html#ToTensor

**ToTensor()** --> Converts a `PIL.Image` or `numpy.ndarray (H x W x C)` in the range [0, 255] to 
a torch.FloatTensor of shape `(C x H x W)` in the range [0.0, 1.0].

In [None]:
train_transform = transforms.Compose([
    np.uint8,
    transforms.Lambda(lambda x: np.array([x, x, x]).transpose((1, 2, 0)) ),
    np.uint8,
    Albumentations(preprocess + augmentations),
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD),
#     transforms.ToPILImage(),
])

valid_transform = transforms.Compose([
    np.uint8,
    transforms.Lambda(lambda x: np.array([x, x, x]).transpose((1, 2, 0)) ),
    np.uint8,
    Albumentations(preprocess),
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD),
#     transforms.ToPILImage(),
])

In [None]:
font_dataset = GraphemeDataset(font_data, font_images, train_transform)
valid_dataset = GraphemeDataset(font_data, font_images, valid_transform)

# Create Model

In [None]:
class BengaliModel(nn.Module):
    
    def __init__(self, backbone, hidden_size=2560, class_num=168*11*7):
        super(BengaliModel, self).__init__()
        self.backbone = backbone
        self._avg_pooling = nn.AdaptiveAvgPool2d(1)
        self.ln = nn.LayerNorm(hidden_size)
        self.fc = nn.Linear(hidden_size, class_num)
        
    def forward(self, inputs):
        bs = inputs.shape[0]
        feature = self.backbone.extract_features(inputs)
        feature_vector = self._avg_pooling(feature)
        feature_vector = feature_vector.view(bs, -1)
        feature_vector = self.ln(feature_vector)
        out = self.fc(feature_vector)
        
        return out

In [None]:
backbone = EfficientNet.from_name('efficientnet-b0')
classifier = BengaliModel(backbone, hidden_size=1280, class_num=168*11*8).to(device)

# Create Data Loader

In [None]:
num_train_samples = int(len(font_dataset)) * (EPOCH)
num_valid_samples = int(len(valid_dataset)) * (EPOCH)

font_sampler = torch.utils.data.RandomSampler(font_dataset, replacement=True, num_samples=num_train_samples)
valid_sampler = torch.utils.data.RandomSampler(valid_dataset, replacement=True, num_samples=num_valid_samples)

In [None]:
font_loader = torch.utils.data.DataLoader(
    font_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=1,
    pin_memory=True,
    drop_last=True,
    sampler=font_sampler
)

valid_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=1,
    pin_memory=True,
    drop_last=True,
    sampler=valid_sampler
)

In [None]:
font_loader_iter = iter(font_loader)
valid_loader_iter = iter(valid_loader)

# Training

In [None]:
def train_step(model, train_iter, criterion, optimizer, schedular, device):
    image, label = next(train_iter)
    image = image.to(device)
    label = label.to(device)
    
    optimizer.zero_grad()
    out = model(image)

    loss = criterion(out, label)
    loss.backward()
    optimizer.step()
    schedular.step()
    
    return loss

In [None]:
optimizer = torch.optim.AdamW(classifier.parameters())
classifier_loss = nn.CrossEntropyLoss()

In [None]:
num_steps_per_epoch = len(font_loader) // EPOCH
num_valid_steps_per_epoch = len(valid_dataset) // EPOCH
train_steps = num_steps_per_epoch * EPOCH
WARM_UP_STEP = train_steps * 0.5

def warm_up_linear_decay(step):
    if step < WARM_UP_STEP:
        return 1.0
    else:
        print(step, train_steps, WARM_UP_STEP)
        return (train_steps - step) / (train_steps - WARM_UP_STEP)
    
schedular = torch.optim.lr_scheduler.LambdaLR(optimizer, warm_up_linear_decay)

In [None]:
log = []
best_score = 0.

for epoch in range(EPOCH):
    classifier.train()
    metric = {}
    losses = []
    for i in tqdm(range(num_steps_per_epoch), disable=TQDM_DISABLE):
        loss = train_step(
            classifier, 
            font_loader_iter, 
            classifier_loss, 
            optimizer, 
            schedular,
            device
        )
        losses.append(loss.item())
        
    metric['train/loss'] = sum(losses) / len(losses)
    metric['epoch'] = epoch
    print(f"============== Train loss on {epoch}: {metric['train/loss']} ==============")
    
    classifier.eval()
    preds = []
    labels = []
    
    for i in tqdm(range(num_valid_steps_per_epoch), disable=TQDM_DISABLE):
        image, label = next(valid_loader_iter)
        image = image.to(device)
        with torch.no_grad():
            out = classifier(image)
            pred = out.argmax(dim=1).cpu().numpy()
            
        preds.append(pred)
        labels.append(label.numpy())
        
    preds = np.concatenate(preds)
    labels = np.concatenate(labels)
    accuracy = sklearn.metrics.accuracy_score(y_pred=preds, y_true=labels)
    metric['valid/accuracy'] = accuracy
    
    print(f"============== Valid accuracy on {epoch}: {metric['valid/accuracy']} ==============")
    
    log.append(metric)
    
    if accuracy > best_score:
        best_score = accuracy
        torch.save(classifier.state_dict(), 'best.pth')
    torch.save(classifier.state_dict(), 'model.pth')
    
    with open('log.json', 'w') as f_out:
        json.dump(log, f_out, indent=4)