In [1]:
#===========================================================
# prediction flags
#===========================================================

ID = 'image_id'
target_cols = ['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']
ROOT = '../input/bengaliai-cv19/'
n_grapheme = 168
n_vowel = 11
n_consonant = 7
n_origin = 1295
n_total = n_grapheme + n_vowel + n_consonant + n_origin
N_JOBS = 4
VALIDATION = True
FOLD_NUMS = [0, 
             1, 
             2,
             3, 
             4
            ]
RESNET50_FOLDS = ['fold0_best_score.pth', # CV0.9846
                  'fold1_best_score.pth', # CV0.9860
                  'fold2_best_score.pth', # CV0.9853
                  'fold3_best_score.pth', # CV0.9870
                  'fold4_best_score.pth', # CV0.9873
                 ]
EFFICIENTNET_B2_FOLDS = ['fold0_best_score.pth', # CV0.9842
                         'fold1_best_score.pth', # CV0.9866
                         'fold2_best_score.pth', # CV0.9839
                         'fold3_best_score.pth', # CV0.9862
                         'fold4_best_score.pth', # CV0.9863
                        ]
NUM_TTA = 1

In [2]:
#===========================================================
# imports
#===========================================================

import sys
sys.path.append('../input/pytorch-pretrained-models/repository/pretrained-models.pytorch-master')

import gc
import os
import random
import time
from contextlib import contextmanager
from pathlib import Path

import cv2
from PIL import Image
import numpy as np
import pandas as pd
import scipy as sp
from fastprogress import master_bar, progress_bar
from functools import partial

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader, Dataset

import pretrainedmodels

from albumentations import (
    Compose, HorizontalFlip, IAAAdditiveGaussianNoise, Normalize, OneOf,
    RandomBrightness, RandomContrast, Resize, VerticalFlip, Rotate, ShiftScaleRotate,
    RandomBrightnessContrast, OpticalDistortion, GridDistortion, ElasticTransform, Cutout
)
from albumentations.pytorch import ToTensorV2, ToTensor

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
# ====================================================
# Utils
# ====================================================

@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')

    
def init_logger(log_file='train.log'):
    from logging import getLogger, DEBUG, FileHandler,  Formatter,  StreamHandler
    
    log_format = '%(asctime)s %(levelname)s %(message)s'
    
    stream_handler = StreamHandler()
    stream_handler.setLevel(DEBUG)
    stream_handler.setFormatter(Formatter(log_format))
    
    file_handler = FileHandler(log_file)
    file_handler.setFormatter(Formatter(log_format))
    
    logger = getLogger('Bengali')
    logger.setLevel(DEBUG)
    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)
    
    return logger

LOG_FILE = 'bengali-train.log'
LOGGER = init_logger(LOG_FILE)


def seed_torch(seed=777):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 777
seed_torch(SEED)

In [5]:
# =================================================================
# Dataset
# =================================================================

img_cols = list(map(str, list(np.arange(137*236))))
IMG_RESIZE = 128


class valid_GraphemeDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        file_path = f'../input/bengali-images/{self.df.image_id.values[idx]}.png'
        image = cv2.imread(file_path)
        image = Image.fromarray(np.uint8(image)).convert("L")
        image = np.asarray(image)
        image = cv2.resize(image, (IMG_RESIZE, IMG_RESIZE)).astype(np.float32)
        
        if self.transform:
            res = self.transform(image=image)
            #image = res['image'].astype(np.float32)
            image = res['image']
        else:
            #image = image.astype(np.float32)
            image = image
        
        image /= 255
        image = image[np.newaxis, :, :]
        image = 1 - image
        image = np.repeat(image, 3, 0)  # 1ch to 3ch
        
        return torch.tensor(image)
    

class Test_GraphemeDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        #image = self.df.iloc[idx][1:].values.reshape(137, 236).astype(np.float32)
        image = self.df[idx].reshape(137, 236).astype(np.float32)
        image = cv2.resize(image, (IMG_RESIZE, IMG_RESIZE)).astype(np.float32)
        #image = self.df.iloc[idx][1:].values.reshape(IMG_RESIZE, IMG_RESIZE).astype(np.float32)
        
        if self.transform:
            res = self.transform(image=image)
            #image = res['image'].astype(np.float32)
            image = res['image']
        else:
            #image = image.astype(np.float32)
            image = image
        
        image /= 255
        image = image[np.newaxis, :, :]
        image = 1 - image
        image = np.repeat(image, 3, 0)  # 1ch to 3ch
        
        return torch.tensor(image)

In [6]:
# =================================================================
# transforms
# =================================================================

def get_transforms():
    if NUM_TTA>=2:
        return Compose([
                        Rotate(limit=5, p=0.5),
                        ])
    else:
        return None

In [7]:
# =================================================================
# ResNet50
# =================================================================

from torch.nn.parameter import Parameter
import torch.nn.functional as F


def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)


class GeM(nn.Module):
    
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps
        
    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)
    
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

    
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1
    __constants__ = ['downsample']

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4
    __constants__ = ['downsample']

    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
                 base_width=64, dilation=1, norm_layer=None):
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        #self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.avgpool1 = GeM()
        self.avgpool2 = GeM()
        self.avgpool3 = GeM()
        #self.dropout1 = nn.Dropout(0.1)
        #self.dropout2 = nn.Dropout(0.1)
        #self.dropout3 = nn.Dropout(0.1)
        #self.fc = nn.Linear(512 * block.expansion, num_classes)
        # grapheme_root
        self.fc1 = nn.Linear(512 * block.expansion, n_grapheme)
        # vowel_diacritic
        self.fc2 = nn.Linear(512 * block.expansion, n_vowel)
        # consonant_diacritic
        self.fc3 = nn.Linear(512 * block.expansion, n_consonant)
        # grapheme
        self.fc4 = nn.Linear(512 * block.expansion * 3, n_origin)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x):
        # See note [TorchScript super()]
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        #x = self.avgpool(x)
        #x = torch.flatten(x, 1)
        #x = self.fc(x)

        x1 = self.avgpool1(x)
        x2 = self.avgpool2(x)
        x3 = self.avgpool3(x)
        x1 = torch.flatten(x1, 1)
        x2 = torch.flatten(x2, 1)
        x3 = torch.flatten(x3, 1)
        #x1 = self.dropout1(x1)
        #x2 = self.dropout2(x2)
        #x3 = self.dropout3(x3)

        h_conc = torch.cat((x1, x2, x3), 1)
        x4 = self.fc4(h_conc)

        x1 = self.fc1(x1)
        x2 = self.fc2(x2)
        x3 = self.fc3(x3)

        return x1, x2, x3, x4

    def forward(self, x):
        return self._forward_impl(x)


def _resnet(arch, block, layers, pretrained, progress, **kwargs):
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model


def resnet50(pretrained=False, progress=True, **kwargs):
    r"""ResNet-50 model from
    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
                   **kwargs)

In [8]:
# =================================================================
# efficientnet-b2
# https://github.com/lukemelas/EfficientNet-PyTorch/tree/master/efficientnet_pytorch
# =================================================================
import re
import math
import collections
from functools import partial
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils import model_zoo

########################################################################
############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
########################################################################


# Parameters for the entire model (stem, all blocks, and head)
GlobalParams = collections.namedtuple('GlobalParams', [
    'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
    'num_classes', 'width_coefficient', 'depth_coefficient',
    'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])

# Parameters for an individual model block
BlockArgs = collections.namedtuple('BlockArgs', [
    'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
    'expand_ratio', 'id_skip', 'stride', 'se_ratio'])

# Change namedtuple defaults
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)


class SwishImplementation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))


class MemoryEfficientSwish(nn.Module):
    def forward(self, x):
        return SwishImplementation.apply(x)

class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)


def round_filters(filters, global_params):
    """ Calculate and round number of filters based on depth multiplier. """
    multiplier = global_params.width_coefficient
    if not multiplier:
        return filters
    divisor = global_params.depth_divisor
    min_depth = global_params.min_depth
    filters *= multiplier
    min_depth = min_depth or divisor
    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
        new_filters += divisor
    return int(new_filters)


def round_repeats(repeats, global_params):
    """ Round number of filters based on depth multiplier. """
    multiplier = global_params.depth_coefficient
    if not multiplier:
        return repeats
    return int(math.ceil(multiplier * repeats))


def drop_connect(inputs, p, training):
    """ Drop connect. """
    if not training: return inputs
    batch_size = inputs.shape[0]
    keep_prob = 1 - p
    random_tensor = keep_prob
    random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
    binary_tensor = torch.floor(random_tensor)
    output = inputs / keep_prob * binary_tensor
    return output


def get_same_padding_conv2d(image_size=None):
    """ Chooses static padding if you have specified an image size, and dynamic padding otherwise.
        Static padding is necessary for ONNX exporting of models. """
    if image_size is None:
        return Conv2dDynamicSamePadding
    else:
        return partial(Conv2dStaticSamePadding, image_size=image_size)


class Conv2dDynamicSamePadding(nn.Conv2d):
    """ 2D Convolutions like TensorFlow, for a dynamic image size """

    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2

    def forward(self, x):
        ih, iw = x.size()[-2:]
        kh, kw = self.weight.size()[-2:]
        sh, sw = self.stride
        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
        if pad_h > 0 or pad_w > 0:
            x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)


class Conv2dStaticSamePadding(nn.Conv2d):
    """ 2D Convolutions like TensorFlow, for a fixed image size"""

    def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
        super().__init__(in_channels, out_channels, kernel_size, **kwargs)
        self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2

        # Calculate padding based on image size and save it
        assert image_size is not None
        ih, iw = image_size if type(image_size) == list else [image_size, image_size]
        kh, kw = self.weight.size()[-2:]
        sh, sw = self.stride
        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
        pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
        pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
        if pad_h > 0 or pad_w > 0:
            self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
        else:
            self.static_padding = Identity()

    def forward(self, x):
        x = self.static_padding(x)
        x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
        return x


class Identity(nn.Module):
    def __init__(self, ):
        super(Identity, self).__init__()

    def forward(self, input):
        return input


########################################################################
############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
########################################################################


def efficientnet_params(model_name):
    """ Map EfficientNet model name to parameter coefficients. """
    params_dict = {
        # Coefficients:   width,depth,res,dropout
        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
        'efficientnet-b8': (2.2, 3.6, 672, 0.5),
        'efficientnet-l2': (4.3, 5.3, 800, 0.5),
    }
    return params_dict[model_name]


class BlockDecoder(object):
    """ Block Decoder for readability, straight from the official TensorFlow repository """

    @staticmethod
    def _decode_block_string(block_string):
        """ Gets a block through a string notation of arguments. """
        assert isinstance(block_string, str)

        ops = block_string.split('_')
        options = {}
        for op in ops:
            splits = re.split(r'(\d.*)', op)
            if len(splits) >= 2:
                key, value = splits[:2]
                options[key] = value

        # Check stride
        assert (('s' in options and len(options['s']) == 1) or
                (len(options['s']) == 2 and options['s'][0] == options['s'][1]))

        return BlockArgs(
            kernel_size=int(options['k']),
            num_repeat=int(options['r']),
            input_filters=int(options['i']),
            output_filters=int(options['o']),
            expand_ratio=int(options['e']),
            id_skip=('noskip' not in block_string),
            se_ratio=float(options['se']) if 'se' in options else None,
            stride=[int(options['s'][0])])

    @staticmethod
    def _encode_block_string(block):
        """Encodes a block to a string."""
        args = [
            'r%d' % block.num_repeat,
            'k%d' % block.kernel_size,
            's%d%d' % (block.strides[0], block.strides[1]),
            'e%s' % block.expand_ratio,
            'i%d' % block.input_filters,
            'o%d' % block.output_filters
        ]
        if 0 < block.se_ratio <= 1:
            args.append('se%s' % block.se_ratio)
        if block.id_skip is False:
            args.append('noskip')
        return '_'.join(args)

    @staticmethod
    def decode(string_list):
        """
        Decodes a list of string notations to specify blocks inside the network.
        :param string_list: a list of strings, each string is a notation of block
        :return: a list of BlockArgs namedtuples of block args
        """
        assert isinstance(string_list, list)
        blocks_args = []
        for block_string in string_list:
            blocks_args.append(BlockDecoder._decode_block_string(block_string))
        return blocks_args

    @staticmethod
    def encode(blocks_args):
        """
        Encodes a list of BlockArgs to a list of strings.
        :param blocks_args: a list of BlockArgs namedtuples of block args
        :return: a list of strings, each string is a notation of block
        """
        block_strings = []
        for block in blocks_args:
            block_strings.append(BlockDecoder._encode_block_string(block))
        return block_strings


def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
                 drop_connect_rate=0.2, image_size=None, num_classes=1000):
    """ Creates a efficientnet model. """

    blocks_args = [
        'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
        'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
        'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
        'r1_k3_s11_e6_i192_o320_se0.25',
    ]
    blocks_args = BlockDecoder.decode(blocks_args)

    global_params = GlobalParams(
        batch_norm_momentum=0.99,
        batch_norm_epsilon=1e-3,
        dropout_rate=dropout_rate,
        drop_connect_rate=drop_connect_rate,
        # data_format='channels_last',  # removed, this is always true in PyTorch
        num_classes=num_classes,
        width_coefficient=width_coefficient,
        depth_coefficient=depth_coefficient,
        depth_divisor=8,
        min_depth=None,
        image_size=image_size,
    )

    return blocks_args, global_params


def get_model_params(model_name, override_params):
    """ Get the block args and global params for a given model """
    if model_name.startswith('efficientnet'):
        w, d, s, p = efficientnet_params(model_name)
        # note: all models have drop connect rate = 0.2
        blocks_args, global_params = efficientnet(
            width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
    else:
        raise NotImplementedError('model name is not pre-defined: %s' % model_name)
    if override_params:
        # ValueError will be raised here if override_params has fields not included in global_params.
        global_params = global_params._replace(**override_params)
    return blocks_args, global_params


def load_pretrained_weights(model, model_name, load_fc=True, advprop=False):
    """ Loads pretrained weights, and downloads if loading for the first time. """
    # AutoAugment or Advprop (different preprocessing)
    efficientnet_pretrained_path = '../input/pytorch-pretrained-models/efficientnet-b2-8bb594d6.pth'
    state_dict = torch.load(efficientnet_pretrained_path)
    if load_fc:
        model.load_state_dict(state_dict, strict=False)
    else:
        state_dict.pop('_fc.weight')
        state_dict.pop('_fc.bias')
        res = model.load_state_dict(state_dict, strict=False)
        assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
    print('Loaded pretrained weights for {}'.format(model_name))


class MBConvBlock(nn.Module):
    """
    Mobile Inverted Residual Bottleneck Block
    Args:
        block_args (namedtuple): BlockArgs, see above
        global_params (namedtuple): GlobalParam, see above
    Attributes:
        has_se (bool): Whether the block contains a Squeeze and Excitation layer.
    """

    def __init__(self, block_args, global_params):
        super().__init__()
        self._block_args = block_args
        self._bn_mom = 1 - global_params.batch_norm_momentum
        self._bn_eps = global_params.batch_norm_epsilon
        self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
        self.id_skip = block_args.id_skip  # skip connection and drop connect

        # Get static or dynamic convolution depending on image size
        Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)

        # Expansion phase
        inp = self._block_args.input_filters  # number of input channels
        oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
        if self._block_args.expand_ratio != 1:
            self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
            self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)

        # Depthwise convolution phase
        k = self._block_args.kernel_size
        s = self._block_args.stride
        self._depthwise_conv = Conv2d(
            in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
            kernel_size=k, stride=s, bias=False)
        self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)

        # Squeeze and Excitation layer, if desired
        if self.has_se:
            num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
            self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
            self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)

        # Output phase
        final_oup = self._block_args.output_filters
        self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
        self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
        self._swish = MemoryEfficientSwish()

    def forward(self, inputs, drop_connect_rate=None):
        """
        :param inputs: input tensor
        :param drop_connect_rate: drop connect rate (float, between 0 and 1)
        :return: output of block
        """

        # Expansion and Depthwise Convolution
        x = inputs
        if self._block_args.expand_ratio != 1:
            x = self._swish(self._bn0(self._expand_conv(inputs)))
        x = self._swish(self._bn1(self._depthwise_conv(x)))

        # Squeeze and Excitation
        if self.has_se:
            x_squeezed = F.adaptive_avg_pool2d(x, 1)
            x_squeezed = self._se_expand(self._swish(self._se_reduce(x_squeezed)))
            x = torch.sigmoid(x_squeezed) * x

        x = self._bn2(self._project_conv(x))

        # Skip connection and drop connect
        input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
        if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
            if drop_connect_rate:
                x = drop_connect(x, p=drop_connect_rate, training=self.training)
            x = x + inputs  # skip connection
        return x

    def set_swish(self, memory_efficient=True):
        """Sets swish function as memory efficient (for training) or standard (for export)"""
        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()


class EfficientNet(nn.Module):
    """
    An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
    Args:
        blocks_args (list): A list of BlockArgs to construct blocks
        global_params (namedtuple): A set of GlobalParams shared between blocks
    Example:
        model = EfficientNet.from_pretrained('efficientnet-b0')
    """

    def __init__(self, blocks_args=None, global_params=None):
        super().__init__()
        assert isinstance(blocks_args, list), 'blocks_args should be a list'
        assert len(blocks_args) > 0, 'block args must be greater than 0'
        self._global_params = global_params
        self._blocks_args = blocks_args

        # Get static or dynamic convolution depending on image size
        Conv2d = get_same_padding_conv2d(image_size=global_params.image_size)

        # Batch norm parameters
        bn_mom = 1 - self._global_params.batch_norm_momentum
        bn_eps = self._global_params.batch_norm_epsilon

        # Stem
        in_channels = 3  # rgb
        out_channels = round_filters(32, self._global_params)  # number of output channels
        self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
        self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)

        # Build blocks
        self._blocks = nn.ModuleList([])
        for block_args in self._blocks_args:

            # Update block input and output filters based on depth multiplier.
            block_args = block_args._replace(
                input_filters=round_filters(block_args.input_filters, self._global_params),
                output_filters=round_filters(block_args.output_filters, self._global_params),
                num_repeat=round_repeats(block_args.num_repeat, self._global_params)
            )

            # The first block needs to take care of stride and filter size increase.
            self._blocks.append(MBConvBlock(block_args, self._global_params))
            if block_args.num_repeat > 1:
                block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
            for _ in range(block_args.num_repeat - 1):
                self._blocks.append(MBConvBlock(block_args, self._global_params))

        # Head
        in_channels = block_args.output_filters  # output of final block
        out_channels = round_filters(1280, self._global_params)
        self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self._bn1 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)

        # Final linear layer
        #self._avg_pooling = nn.AdaptiveAvgPool2d(1)
        self._dropout = nn.Dropout(self._global_params.dropout_rate)
        #self._fc = nn.Linear(out_channels, self._global_params.num_classes)
        self._swish = MemoryEfficientSwish()

        self.avgpool1 = GeM()
        self.avgpool2 = GeM()
        self.avgpool3 = GeM()

        # grapheme_root
        self.fc1 = nn.Linear(out_channels, n_grapheme)
        # vowel_diacritic
        self.fc2 = nn.Linear(out_channels, n_vowel)
        # consonant_diacritic
        self.fc3 = nn.Linear(out_channels, n_consonant)
        # grapheme
        self.fc4 = nn.Linear(out_channels * 3, n_origin)

    def set_swish(self, memory_efficient=True):
        """Sets swish function as memory efficient (for training) or standard (for export)"""
        self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
        for block in self._blocks:
            block.set_swish(memory_efficient)


    def extract_features(self, inputs):
        """ Returns output of the final convolution layer """

        # Stem
        x = self._swish(self._bn0(self._conv_stem(inputs)))

        # Blocks
        for idx, block in enumerate(self._blocks):
            drop_connect_rate = self._global_params.drop_connect_rate
            if drop_connect_rate:
                drop_connect_rate *= float(idx) / len(self._blocks)
            x = block(x, drop_connect_rate=drop_connect_rate)

        # Head
        x = self._swish(self._bn1(self._conv_head(x)))

        return x

    def forward(self, inputs):
        """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
        bs = inputs.size(0)
        # Convolution layers
        x = self.extract_features(inputs)

        # Pooling and final linear layer
        #x = self._avg_pooling(x)
        #x = x.view(bs, -1)
        x1 = self.avgpool1(x)
        x2 = self.avgpool2(x)
        x3 = self.avgpool3(x)
        x1 = x1.view(bs, -1)
        x2 = x2.view(bs, -1)
        x3 = x3.view(bs, -1)

        h_conc = torch.cat((x1, x2, x3), 1)
        x4 = self.fc4(h_conc)

        #x = self._dropout(x)
        #x = self._fc(x)
        x1 = self.fc1(x1)
        x2 = self.fc2(x2)
        x3 = self.fc3(x3)
        return x1, x2, x3, x4

    @classmethod
    def from_name(cls, model_name, override_params=None):
        cls._check_model_name_is_valid(model_name)
        blocks_args, global_params = get_model_params(model_name, override_params)
        return cls(blocks_args, global_params)

    @classmethod
    def from_pretrained(cls, model_name, advprop=False, num_classes=1000, in_channels=3):
        model = cls.from_name(model_name, override_params={'num_classes': num_classes})
        load_pretrained_weights(model, model_name, load_fc=(num_classes == 1000), advprop=advprop)
        if in_channels != 3:
            Conv2d = get_same_padding_conv2d(image_size = model._global_params.image_size)
            out_channels = round_filters(32, model._global_params)
            model._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
        return model

    @classmethod
    def get_image_size(cls, model_name):
        cls._check_model_name_is_valid(model_name)
        _, _, res, _ = efficientnet_params(model_name)
        return res

    @classmethod
    def _check_model_name_is_valid(cls, model_name):
        """ Validates model name. """
        valid_models = ['efficientnet-b'+str(i) for i in range(9)]
        if model_name not in valid_models:
            raise ValueError('model_name should be one of: ' + ', '.join(valid_models))


class Efficientnetb2(nn.Module):
    def __init__(self):
        super().__init__()
        #self.net = EfficientNet.from_pretrained('efficientnet-b2')
        self.net = EfficientNet.from_name('efficientnet-b2')


    def forward(self, x):
        return self.net(x)

In [9]:
# =================================================================
# validation
# =================================================================

if VALIDATION:

    from tqdm import tqdm
    import sklearn.metrics

    folds = pd.read_csv('../input/bengali-resnet50-v3/folds.csv')

    predictions_ids = []
    resnet_predictions1 = []
    resnet_predictions2 = []
    resnet_predictions3 = []
    efficientnet_predictions1 = []
    efficientnet_predictions2 = []
    efficientnet_predictions3 = []
    batch_size = 256

    with timer('validation'):
        
        for num in FOLD_NUMS:

            val_idx = folds[folds['fold'] == num].index
            df = folds.loc[val_idx].reset_index(drop=True)
            predictions_ids.append(list(df[ID].values))
            test_dataset = valid_GraphemeDataset(df, 
                                                 transform=get_transforms(),
                                                 #transform=None,
                                                )
            test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=N_JOBS)
            
            # ResNet50
            FOLD = RESNET50_FOLDS[num]
            model = resnet50()
            model.load_state_dict(torch.load(f'../input/bengali-resnet50-v3/resnet50_{FOLD}'))
            model.to(device)

            for param in model.parameters():
                param.requires_grad = False
            model.eval()
            
            resnet_preds1 = []
            resnet_preds2 = []
            resnet_preds3 = []

            for i, images in tqdm(enumerate(test_loader)):

                images = images.to(device) 

                with torch.no_grad():
                    y_preds1, y_preds2, y_preds3, _ = model(images)

                resnet_preds1.append(list(y_preds1.to('cpu').numpy()))
                resnet_preds2.append(list(y_preds2.to('cpu').numpy()))
                resnet_preds3.append(list(y_preds3.to('cpu').numpy()))
            
            flatten_resnet_preds1 = sum(resnet_preds1, [])
            flatten_resnet_preds2 = sum(resnet_preds2, [])
            flatten_resnet_preds3 = sum(resnet_preds3, [])

            # efficinetnet-b2
            FOLD = EFFICIENTNET_B2_FOLDS[num]
            model = Efficientnetb2()
            model.load_state_dict(torch.load(f'../input/bengali-efficientnetb2/efficientnet-b2_{FOLD}'))
            model.to(device)

            for param in model.parameters():
                param.requires_grad = False
            model.eval()
            
            efficientnet_preds1 = []
            efficientnet_preds2 = []
            efficientnet_preds3 = []

            for i, images in tqdm(enumerate(test_loader)):

                images = images.to(device) 

                with torch.no_grad():
                    y_preds1, y_preds2, y_preds3, _ = model(images)

                efficientnet_preds1.append(list(y_preds1.to('cpu').numpy()))
                efficientnet_preds2.append(list(y_preds2.to('cpu').numpy()))
                efficientnet_preds3.append(list(y_preds3.to('cpu').numpy()))
            
            flatten_efficientnet_preds1 = sum(efficientnet_preds1, [])
            flatten_efficientnet_preds2 = sum(efficientnet_preds2, [])
            flatten_efficientnet_preds3 = sum(efficientnet_preds3, [])
            
            resnet_predictions1.append(flatten_resnet_preds1)
            resnet_predictions2.append(flatten_resnet_preds2)
            resnet_predictions3.append(flatten_resnet_preds3)
            efficientnet_predictions1.append(flatten_efficientnet_preds1)
            efficientnet_predictions2.append(flatten_efficientnet_preds2)
            efficientnet_predictions3.append(flatten_efficientnet_preds3)

2020-03-16 04:39:55,504 INFO [validation] start
157it [00:58,  2.67it/s]
157it [00:40,  3.89it/s]
157it [00:55,  2.82it/s]
157it [00:41,  3.75it/s]
157it [00:54,  2.86it/s]
157it [00:41,  3.79it/s]
157it [00:56,  2.80it/s]
157it [00:41,  3.81it/s]
157it [00:56,  2.78it/s]
157it [00:41,  3.76it/s]
2020-03-16 04:48:16,482 INFO [validation] done in 501 s.


In [10]:
flatten_resnet_preds1 = sum(resnet_predictions1, [])
flatten_resnet_preds2 = sum(resnet_predictions2, [])
flatten_resnet_preds3 = sum(resnet_predictions3, [])
flatten_efficientnet_preds1 = sum(efficientnet_predictions1, [])
flatten_efficientnet_preds2 = sum(efficientnet_predictions2, [])
flatten_efficientnet_preds3 = sum(efficientnet_predictions3, [])

# ResNet50

In [11]:
sum_predictions_ids = sum(predictions_ids, [])

val_df = pd.DataFrame()
val_df[ID] = sum_predictions_ids
val_df['pred_grapheme_root'] = np.array(flatten_resnet_preds1).argmax(1)
val_df['pred_vowel_diacritic'] = np.array(flatten_resnet_preds2).argmax(1)
val_df['pred_consonant_diacritic'] = np.array(flatten_resnet_preds3).argmax(1)
    
eval_df = df.merge(val_df, on=ID)
eval_df['pred_grapheme_root'] = eval_df['pred_grapheme_root'].astype(int)
eval_df['pred_vowel_diacritic'] = eval_df['pred_vowel_diacritic'].astype(int)
eval_df['pred_consonant_diacritic'] = eval_df['pred_consonant_diacritic'].astype(int)

scores = []
scores.append(sklearn.metrics.recall_score(eval_df['grapheme_root'].values, 
                                           eval_df['pred_grapheme_root'].values, average='macro'))
scores.append(sklearn.metrics.recall_score(eval_df['vowel_diacritic'].values, 
                                           eval_df['pred_vowel_diacritic'].values, average='macro'))
scores.append(sklearn.metrics.recall_score(eval_df['consonant_diacritic'].values, 
                                           eval_df['pred_consonant_diacritic'].values, average='macro'))
final_score = np.average(scores, weights=[2,1,1])
print(scores)
print(final_score)

[0.9835870486592777, 0.9925440485790218, 0.9893531351244654]
0.9872678202555106


# Efficientnet-b2

In [12]:
sum_predictions_ids = sum(predictions_ids, [])

val_df = pd.DataFrame()
val_df[ID] = sum_predictions_ids
val_df['pred_grapheme_root'] = np.array(flatten_efficientnet_preds1).argmax(1)
val_df['pred_vowel_diacritic'] = np.array(flatten_efficientnet_preds2).argmax(1)
val_df['pred_consonant_diacritic'] = np.array(flatten_efficientnet_preds3).argmax(1)
    
eval_df = df.merge(val_df, on=ID)
eval_df['pred_grapheme_root'] = eval_df['pred_grapheme_root'].astype(int)
eval_df['pred_vowel_diacritic'] = eval_df['pred_vowel_diacritic'].astype(int)
eval_df['pred_consonant_diacritic'] = eval_df['pred_consonant_diacritic'].astype(int)

scores = []
scores.append(sklearn.metrics.recall_score(eval_df['grapheme_root'].values, 
                                           eval_df['pred_grapheme_root'].values, average='macro'))
scores.append(sklearn.metrics.recall_score(eval_df['vowel_diacritic'].values, 
                                           eval_df['pred_vowel_diacritic'].values, average='macro'))
scores.append(sklearn.metrics.recall_score(eval_df['consonant_diacritic'].values, 
                                           eval_df['pred_consonant_diacritic'].values, average='macro'))
final_score = np.average(scores, weights=[2,1,1])
print(scores)
print(final_score)

[0.9824088557332425, 0.9913218347459218, 0.9885381231648432]
0.9861694173443126


# Ensemble

In [13]:
best_score1 = 0
best_weight = 0

for v in tqdm(np.arange(0.300, 0.701, 0.001)):
    
    ENSEMBLE_WEIGHTS = {'resnet': v, 'efficientnet': 1-v}

    ensemble_proba1 = ENSEMBLE_WEIGHTS['resnet']*np.array(flatten_resnet_preds1)\
                                    + ENSEMBLE_WEIGHTS['efficientnet']*np.array(flatten_efficientnet_preds1)
    
    sum_predictions_ids = sum(predictions_ids, [])

    val_df = pd.DataFrame()
    val_df[ID] = sum_predictions_ids
    val_df['pred_grapheme_root'] = ensemble_proba1.argmax(1)
    
    eval_df = df.merge(val_df, on=ID)
    eval_df['pred_grapheme_root'] = eval_df['pred_grapheme_root'].astype(int)
    
    score = sklearn.metrics.recall_score(eval_df['grapheme_root'].values, 
                                         eval_df['pred_grapheme_root'].values, average='macro')
    if score>best_score1:
        best_score1 = score
        best_weight = ENSEMBLE_WEIGHTS

print(best_weight)
print(best_score1)

100%|██████████| 401/401 [03:44<00:00,  1.79it/s]

{'resnet': 0.6320000000000003, 'efficientnet': 0.36799999999999966}
0.9863010657508401





In [14]:
best_score2 = 0
best_weight = 0

for v in tqdm(np.arange(0.300, 0.701, 0.001)):
    
    ENSEMBLE_WEIGHTS = {'resnet': v, 'efficientnet': 1-v}

    ensemble_proba2 = ENSEMBLE_WEIGHTS['resnet']*np.array(flatten_resnet_preds2)\
                                    + ENSEMBLE_WEIGHTS['efficientnet']*np.array(flatten_efficientnet_preds2)
    
    sum_predictions_ids = sum(predictions_ids, [])

    val_df = pd.DataFrame()
    val_df[ID] = sum_predictions_ids
    val_df['pred_vowel_diacritic'] = ensemble_proba2.argmax(1)
    
    eval_df = df.merge(val_df, on=ID)
    eval_df['pred_vowel_diacritic'] = eval_df['pred_vowel_diacritic'].astype(int)
    
    score = sklearn.metrics.recall_score(eval_df['vowel_diacritic'].values, 
                                         eval_df['pred_vowel_diacritic'].values, average='macro')
    if score>best_score2:
        best_score2 = score
        best_weight = ENSEMBLE_WEIGHTS
    
print(best_weight)
print(best_score2)

100%|██████████| 401/401 [02:32<00:00,  2.63it/s]

{'resnet': 0.6780000000000004, 'efficientnet': 0.3219999999999996}
0.9929003090148213





In [15]:
best_score3 = 0
best_weight = 0

for v in tqdm(np.arange(0.300, 0.701, 0.001)):
    
    ENSEMBLE_WEIGHTS = {'resnet': v, 'efficientnet': 1-v}

    ensemble_proba3 = ENSEMBLE_WEIGHTS['resnet']*np.array(flatten_resnet_preds3)\
                                    + ENSEMBLE_WEIGHTS['efficientnet']*np.array(flatten_efficientnet_preds3)
    
    sum_predictions_ids = sum(predictions_ids, [])

    val_df = pd.DataFrame()
    val_df[ID] = sum_predictions_ids
    val_df['pred_consonant_diacritic'] = ensemble_proba3.argmax(1)
    
    eval_df = df.merge(val_df, on=ID)
    eval_df['pred_consonant_diacritic'] = eval_df['pred_consonant_diacritic'].astype(int)
    
    score = sklearn.metrics.recall_score(eval_df['consonant_diacritic'].values, 
                                         eval_df['pred_consonant_diacritic'].values, average='macro')
    if score>best_score3:
        best_score3 = score
        best_weight = ENSEMBLE_WEIGHTS
    
print(best_weight)
print(best_score3)

100%|██████████| 401/401 [02:30<00:00,  2.66it/s]

{'resnet': 0.47700000000000015, 'efficientnet': 0.5229999999999999}
0.9900635700972875





In [16]:
scores = [best_score1, best_score2, best_score3]
final_score = np.average(scores, weights=[2,1,1])
print(scores)
print(final_score)

[0.9863010657508401, 0.9929003090148213, 0.9900635700972875]
0.9888915026534473
