In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from tqdm import tqdm
#from efficientnet_pytorch import EfficientNet
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold

from apex import amp
BATCH_SIZE = 512
ENABLE_APEX = True

In [2]:
#DATA_DIR = '/kaggle/input/bengaliai-cv19'
#MODEL_DIR = '/kaggle/input/model3-weights'

DATA_DIR = '/mnt/chicm/data/bengali'
#MODEL_DIR = './models'

In [3]:
#train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
#test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
#class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
#sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [4]:
from cvcore.data.auto_augment import RandAugment
def get_train_augs():
    return RandAugment(n=2, m=27)

In [5]:
HEIGHT = 137
WIDTH = 236

from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)
    val_loader.df = val

    return train_loader, val_loader

In [6]:
def get_test_loader(batch_size=4, idx=0):
    img_df = pd.read_parquet(f'{DATA_DIR}/test_image_data_{idx}.parquet').set_index('image_id')

    ds = BengaliDataset(img_df)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=False)
    loader.num = len(ds)
    return loader

# model

In [7]:
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

In [8]:
class BengaliNet3(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet3, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        #self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_name(self.backbone_name, override_params={'num_classes': 1000})
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained=None)
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)
        
        #self.fix_input_layer()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
    def logits(self, x):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        #print(x.size())
        return self.fc(x)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [9]:
class BengaliResNet(nn.Module):
    def __init__(self, backbone_name='se_resnext50_32x4d'):
        super(BengaliResNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.n_word = 1295
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant + self.n_word
        
        self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained=None)
        self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)
        
        self.num_p2_features = self.backbone.layer2[-1].se_module.fc2.out_channels
        self.num_p3_features = self.backbone.layer3[-1].se_module.fc2.out_channels
        self.p2_head = nn.Conv2d(self.num_p2_features, self.num_p2_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.p3_head = nn.Conv2d(self.num_p3_features, self.num_p3_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn2 = nn.BatchNorm2d(self.num_p2_features * 4)
        self.bn3 = nn.BatchNorm2d(self.num_p3_features * 4)
        self.act2 = Swish()
        self.act3 = Swish()
        
        self.fc_aux1 = nn.Linear(self.num_p3_features * 4, self.num_classes)
        self.fc_aux2 = nn.Linear(self.num_p2_features * 4, self.num_classes)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        for fc in [self.fc, self.fc_aux1, self.fc_aux2]:
            nn.init.zeros_(fc.bias.data)

        print('init model4')
        
    def features(self, x):
        x = self.backbone.layer0(x); #print(x.size())
        x = self.backbone.layer1(x); #print(x.size())
        x = self.backbone.layer2(x); p2 = x; p2 = self.p2_head(p2); p2 = self.bn2(p2); p2 = self.act2(p2) #print(x.size())
        x = self.backbone.layer3(x); p3 = x; p3 = self.p3_head(p3); p3 = self.bn3(p3); p3 = self.act3(p3) #print(x.size())
        x = self.backbone.layer4(x); #print(x.size())
        return x, p2, p3
        
    def logits(self, x, p2, p3):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = torch.flatten(x, 1)
        
        p2 = self.avg_pool(p2)
        p2 = torch.flatten(p2, 1)
        
        p3 = self.avg_pool(p3)
        p3 = torch.flatten(p3, 1)
        return self.fc(x), self.fc_aux1(p3), self.fc_aux2(p2)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        x, p2, p3 = self.features(x)
        x, logits_aux1, logits_aux2 = self.logits(x, p2, p3)

        return x #, logits_aux1, logits_aux2

In [10]:
'''
cfg = Namespace()
cfg.MODEL_NAME = 'tf_efficientnet_b4'
cfg.PRETRAINED = True
cfg.IN_CHANNELS = 1
cfg.POOL_TYPE = 'avg'
cfg.CLS_HEAD = 'linear'
cfg.MODEL_ACTIVATION = 'swish'
cfg.DROP_CONNECT = 0.2
cfg.DROPOUT= 0.
cfg.NUM_WORD_CLASSES = 1295
cfg.NUM_GRAPHEME_CLASSES = 168
cfg.NUM_VOWEL_CLASSES = 11
cfg.NUM_CONSONANT_CLASSES = 7
cfg.CKP_NAME = 'model4_eb4_fold1.pth'
'''
class BengaliEfficientNet(nn.Module):
    """
    EfficientNet B0-B8.
    Args:
        cfg (CfgNode): configs
    """
    def __init__(self, backbone_name, img_resize=None):
        super(BengaliEfficientNet, self).__init__()
        #model_name = cfg.MODEL_NAME
        self.img_resize = img_resize
        pretrained = False
        input_channels = 1
        pool_type = 'avg'
        drop_connect_rate = 0.2
        self.drop_rate = 0.
        cls_head = 'linear'
        num_total_classes = 168+11+7+1295

        backbone = timm.create_model(
            model_name=backbone_name,
            pretrained=pretrained,
            in_chans=input_channels,
            drop_connect_rate=drop_connect_rate,
        )
        self.conv_stem = backbone.conv_stem
        self.bn1 = backbone.bn1
        self.act1 = backbone.act1
        ### Original blocks ###
        for i in range(len((backbone.blocks))):
            setattr(self, "block{}".format(str(i)), backbone.blocks[i])
        self.conv_head = backbone.conv_head
        self.bn2 = backbone.bn2
        self.act2 = backbone.act2
        self.aux_block5 = backbone.blocks[5]
        self.aux_num_features = self.block5[-1].bn3.num_features
        self.aux_head4 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn4 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act4 = Swish()
        self.aux_head5 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn5 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act5 = Swish()
        self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
        self.num_features = backbone.num_features * self.global_pool.feat_mult()
        assert cls_head == 'linear'
        if cls_head == "linear":
            ### Baseline head ###
            self.fc = nn.Linear(self.num_features, num_total_classes)            
            self.aux_fc1 = nn.Linear(self.aux_num_features*4, num_total_classes)
            self.aux_fc2 = nn.Linear(self.aux_num_features*4, num_total_classes)
            
            for fc in [self.fc, self.aux_fc1, self.aux_fc2]:
                nn.init.zeros_(fc.bias.data)
        #elif cls_head == "norm_softmax":
            ### NormSoftmax ###
            #self.grapheme_fc = NormSoftmax(self.num_features, num_grapheme_classes)
            #self.consonant_fc = NormSoftmax(self.num_features, num_consonant_classes)
            #self.vowel_fc = NormSoftmax(self.num_features, num_vowel_classes)
        # Replace with Mish activation
        #if cfg.MODEL_ACTIVATION == "mish":
        #    convert_swish_to_mish(self)
        del backbone

    def _features(self, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.block0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x); b4 = x
        x = self.block5(x); b4 = self.aux_block5(b4); b5 = x
        x = self.block6(x)
        x = self.conv_head(x); b4 = self.aux_head4(b4); b5 = self.aux_head5(b5)
        x = self.bn2(x); b4 = self.bn4(b4); b5 = self.bn5(b5)
        x = self.act2(x); b4 = self.act4(b4); b5 = self.act5(b5)
        return b4, b5, x

    def forward(self, x):
        if self.img_resize is None:
            x = x.clone()
        else:
            x = F.interpolate(x, size=(self.img_resize, self.img_resize), mode='bilinear', align_corners=False)

        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)

        # _, _, x = self._features(x)
        b4, b5, x = self._features(x)
        x = self.global_pool(x); b4 = self.global_pool(b4); b5 = self.global_pool(b5)
        x = torch.flatten(x, 1); b4 = torch.flatten(b4, 1); b5 = torch.flatten(b5, 1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        logits = self.fc(x)
        
        aux_logits1 = self.aux_fc1(b4)
        aux_logits2 = self.aux_fc2(b5)
        
        return logits #, aux_logits1, aux_logits2

In [11]:
def create_model(backbone, model_file, model_type, img_resize=None):
    print(backbone, model_file, model_type, img_resize)
    if model_type == 'BengaliNet3':
        model = BengaliNet3(backbone_name=backbone)
    elif model_type == 'BengaliEfficientNet':
        model = BengaliEfficientNet(backbone_name=backbone, img_resize=img_resize)
    elif model_type == 'BengaliResNet':
        model = BengaliResNet(backbone_name=backbone)
    else:
        raise ValueError('wrong model type')
    #model_file = os.path.join(MODEL_DIR, ckp_name)

    assert os.path.exists(model_file)
    print('loading {}...'.format(model_file))
    model.load_state_dict(torch.load(model_file))
    
    return model

In [12]:
def create_models():
    models = []
    for backbone, model_file, model_type, img_resize in ckp_list:
        model = create_model(backbone, model_file, model_type, img_resize).cuda()
        #if ENABLE_APEX:
        #    model = amp.initialize(model, None, opt_level="O1",verbosity=0)
        model.eval()
        model = nn.DataParallel(model)
        models.append(model)
    return models

# predict

In [13]:
!ls ./model4-ckps/tf_efficientnet_b4

model4_eb4_fold1_256_512.pth	       model4_eb4_fold2_380_cv998333.pth
model4_eb4_fold1_256_512.pth_swa       model4_eb4_fold2_380_swa_cv998293.pth
model4_eb4_fold1_380.pth	       model4_eb4_fold2_380_swa_cv99858.pth
model4_eb4_fold1_380.pth_swa	       model4_eb4_fold2_380_swa_cv99863.pth
model4_eb4_fold1_380_swa_cv998361.pth  model4_eb4_fold2_380_swa_cv998675.pth
model4_eb4_fold1_380_swa_cv998658.pth  model4_eb4_fold2_380_swa_cv998693.pth
model4_eb4_fold1_380_swa_cv998744.pth  model4_eb4_fold2_380_swa_cv99877.pth
model4_eb4_fold1_cv9964.pth	       model4_eb4_fold2_380_swa_cv998835.pth
model4_eb4_fold1_cv9969.pth	       model4_eb4_fold2_380_swa_cv998894.pth
model4_eb4_fold1_cv9971.pth	       model4_eb4_fold2_380_swa_cv998946.pth
model4_eb4_fold1_cv9976.pth	       model4_eb4_fold2_cv9976.pth
model4_eb4_fold1_cv997705.pth	       model4_eb4_fold3_380_swa_cv998499.pth
model4_eb4_fold1_cv9977.pth	       model4_eb4_fold3_380_swa_cv998671.pth
model4_eb4_fold1_cv997864.pth	       m

In [14]:
!ls ./model4-ckps/se_resnext50_32x4d

model4_se_resnext50_fold0_224_cv9976.pth
model4_se_resnext50_fold0_224_cv9977.pth
model4_se_resnext50_fold0_224_cv9978.pth
model4_se_resnext50_fold0_224_cv998106.pth
model4_se_resnext50_fold0_224_swa_cv998273.pth
model4_se_resnext50_fold4_224_cv997979.pth
model4_se_resnext50_fold4_224_swa_cv998285.pth


In [15]:
ckp_list = [
    ('se_resnext50_32x4d', './model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224_swa_cv998273.pth', 'BengaliResNet', None), # lb
    ('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold1_380_swa_cv998658.pth', 'BengaliEfficientNet', 380),  # lb9893
    ('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380_swa_cv998894.pth', 'BengaliEfficientNet', 380),  # lb
    ('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_cv998307_swa.pth', 'BengaliEfficientNet', None),  # lb9892
    ('se_resnext50_32x4d', './model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold4_224_cv997979.pth', 'BengaliResNet', None) # lb 9882
    ]
model_weights = [0.15, 0.2, 0.3, 0.2, 0.15]
#model_weights = [0.7, 0.3]
#model_weights = [1.]
#model_weights = [0.2, 0.15, 0.25, 0.25, 0.15]

In [16]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(y[:, 0], preds0, average='macro')
    recall_vowel = sklearn.metrics.recall_score(y[:, 1], preds1, average='macro')
    recall_consonant = sklearn.metrics.recall_score(y[:, 2], preds2, average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    old_recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    old_recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    old_recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    
    old_scores = [old_recall_grapheme, old_recall_vowel, old_recall_consonant]
    old_final_recall_score = np.average(old_scores, weights=[2, 1, 1])

    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    metrics['old_recall'] = round(old_final_recall_score, 6)
    metrics['old_recall_grapheme'] = round(old_recall_grapheme, 6)
    
    return metrics

In [17]:
def predict(model, val_loader, ifold):
    print('fold:', ifold)
    preds0, preds1,preds2 = [], [], []
    y_true, soft_labels = [], []
    
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            
            x = x.cuda()
            outputs = model(x)
            soft_labels.append(outputs[:, :186].cpu())
            
            outputs = torch.split(outputs[:, :186], [168, 11, 7], dim=1)
            #print('image:', x)
            #print(outputs[0][:, :10])
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    soft_labels = torch.cat(soft_labels, 0).numpy()
    val_df = val_loader.df
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    print(metrics)
    
    print(soft_labels.shape, val_df.shape)
    val_df.to_csv(f'./gary/fold{ifold}_true_labels.csv', index=False)
    np.save(f'./gary/fold{ifold}_logits.npy', soft_labels)
    
    
    return preds0, preds1, preds2

In [18]:
models = create_models()

se_resnext50_32x4d ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224_swa_cv998273.pth BengaliResNet None
init model4
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224_swa_cv998273.pth...
tf_efficientnet_b4 ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1_380_swa_cv998658.pth BengaliEfficientNet 380
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1_380_swa_cv998658.pth...
tf_efficientnet_b4 ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380_swa_cv998894.pth BengaliEfficientNet 380
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380_swa_cv998894.pth...
tf_efficientnet_b4 ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_cv998307_swa.pth BengaliEfficientNet None
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_cv998307_swa.pth...
se_resnext50_32x4d ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold4_224_cv997979.pth BengaliResNet None
init model4
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold4_224_c

In [34]:
_, val_loader0 = get_train_val_loaders(batch_size=256, val_batch_size=512, ifold=0)

(200840, 6)
(200840, 32332)
(160596, 6) (40244, 6)


In [36]:
val_loader0.df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme,word_label
45330,Train_45330,62,7,0,ণ্ডে,364
177100,Train_177100,113,5,2,র্ভূ,964
194002,Train_194002,15,1,0,ক্টা,39
177170,Train_177170,72,7,4,দ্যে,487
41279,Train_41279,124,1,4,ল্যা,1043


In [38]:
predict(models[0], val_loader0, 0)

{'recall': 0.997514, 'recall_grapheme': 0.997074, 'recall_vowel': 0.998411, 'recall_consonant': 0.997497, 'acc_grapheme': 0.997391, 'acc_vowel': 0.998907, 'acc_consonant': 0.998782, 'old_recall': 0.998273, 'old_recall_grapheme': 0.99803}
(40244, 186) (40244, 6)


(array([ 62, 113,  15, ..., 113, 107,  69]),
 array([7, 5, 1, ..., 4, 0, 1]),
 array([0, 2, 0, ..., 2, 4, 0]))

In [25]:
for i in range(3, 5):
    _, val_loader = get_train_val_loaders(batch_size=256, val_batch_size=512, ifold=i)
    predict(models[i], val_loader, i)

(200840, 6)
(200840, 32332)
(160716, 6) (40124, 6)
fold: 3
{'recall': 0.997782, 'recall_grapheme': 0.996724, 'recall_vowel': 0.998748, 'recall_consonant': 0.998934, 'acc_grapheme': 0.997657, 'acc_vowel': 0.998804, 'acc_consonant': 0.999302, 'old_recall': 0.998307, 'old_recall_grapheme': 0.99744}
(40124, 186) (40124, 6)
(200840, 6)
(200840, 32332)
(160735, 6) (40105, 6)
fold: 4
{'recall': 0.997096, 'recall_grapheme': 0.996461, 'recall_vowel': 0.998323, 'recall_consonant': 0.997139, 'acc_grapheme': 0.996883, 'acc_vowel': 0.998678, 'acc_consonant': 0.998703, 'old_recall': 0.997979, 'old_recall_grapheme': 0.997128}
(40105, 186) (40105, 6)


In [21]:
model1 = BengaliEfficientNet(backbone_name='tf_efficientnet_b4', img_resize=380).cuda()
model1 = amp.initialize(model1, opt_level="O1")


Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [22]:
model1.load_state_dict(torch.load('./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1_380_swa_cv998658.pth'))

<All keys matched successfully>