In [1]:
!pip show timm

Name: timm
Version: 0.1.16
Summary: (Unofficial) PyTorch Image Models
Home-page: https://github.com/rwightman/pytorch-image-models
Author: Ross Wightman
Author-email: hello@rwightman.com
License: UNKNOWN
Location: /mnt/chicm/anaconda3/lib/python3.7/site-packages
Requires: torchvision, torch
Required-by: cvcore


In [2]:
ENABLE_APEX = False

BATCH_SIZE = 1024

if ENABLE_APEX:
    !cd /kaggle/input/nvidia-apex &&  pip install --no-cache-dir --global-option="--cpp_ext" \
        --global-option="--cuda_ext" .
    from apex import amp
    BATCH_SIZE = 512

In [3]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from tqdm import tqdm
#from efficientnet_pytorch import EfficientNet
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


In [4]:
#DATA_DIR = '/kaggle/input/bengaliai-cv19'
#MODEL_DIR = '/kaggle/input/model3-weights'

DATA_DIR = '/mnt/chicm/data/bengali'
#MODEL_DIR = './models'

In [5]:
#train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
#test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
#class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
#sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
HEIGHT = 137
WIDTH = 236

class BengaliDataset(Dataset):
    def __init__(self, img_df):
        self.img_df = img_df

    def __getitem__(self, idx):
        img = 255 - self.img_df.iloc[idx].values.reshape(HEIGHT, WIDTH).astype(np.uint8)
        img = np.expand_dims(img, axis=-1)
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
        return img

    def __len__(self):
        return len(self.img_df)


In [7]:
def get_test_loader(batch_size=4, idx=0):
    img_df = pd.read_parquet(f'{DATA_DIR}/test_image_data_{idx}.parquet').set_index('image_id')

    ds = BengaliDataset(img_df)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0, drop_last=False)
    loader.num = len(ds)
    return loader

# model

In [8]:
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

In [9]:
class BengaliNet3(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet3, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        #self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_name(self.backbone_name, override_params={'num_classes': 1000})
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained=None)
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)
        
        #self.fix_input_layer()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
    def logits(self, x):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        #print(x.size())
        return self.fc(x)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [10]:
class BengaliResNet(nn.Module):
    def __init__(self, backbone_name='se_resnext50_32x4d'):
        super(BengaliResNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.n_word = 1295
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant + self.n_word
        
        self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained=None)
        self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)
        
        self.num_p2_features = self.backbone.layer2[-1].se_module.fc2.out_channels
        self.num_p3_features = self.backbone.layer3[-1].se_module.fc2.out_channels
        self.p2_head = nn.Conv2d(self.num_p2_features, self.num_p2_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.p3_head = nn.Conv2d(self.num_p3_features, self.num_p3_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn2 = nn.BatchNorm2d(self.num_p2_features * 4)
        self.bn3 = nn.BatchNorm2d(self.num_p3_features * 4)
        self.act2 = Swish()
        self.act3 = Swish()
        
        self.fc_aux1 = nn.Linear(self.num_p3_features * 4, self.num_classes)
        self.fc_aux2 = nn.Linear(self.num_p2_features * 4, self.num_classes)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        for fc in [self.fc, self.fc_aux1, self.fc_aux2]:
            nn.init.zeros_(fc.bias.data)

        print('init model4')
        
    def features(self, x):
        x = self.backbone.layer0(x); #print(x.size())
        x = self.backbone.layer1(x); #print(x.size())
        x = self.backbone.layer2(x); p2 = x; p2 = self.p2_head(p2); p2 = self.bn2(p2); p2 = self.act2(p2) #print(x.size())
        x = self.backbone.layer3(x); p3 = x; p3 = self.p3_head(p3); p3 = self.bn3(p3); p3 = self.act3(p3) #print(x.size())
        x = self.backbone.layer4(x); #print(x.size())
        return x, p2, p3
        
    def logits(self, x, p2, p3):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = torch.flatten(x, 1)
        
        p2 = self.avg_pool(p2)
        p2 = torch.flatten(p2, 1)
        
        p3 = self.avg_pool(p3)
        p3 = torch.flatten(p3, 1)
        return self.fc(x), self.fc_aux1(p3), self.fc_aux2(p2)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        x, p2, p3 = self.features(x)
        x, logits_aux1, logits_aux2 = self.logits(x, p2, p3)

        return x #, logits_aux1, logits_aux2

In [107]:
'''
cfg = Namespace()
cfg.MODEL_NAME = 'tf_efficientnet_b4'
cfg.PRETRAINED = True
cfg.IN_CHANNELS = 1
cfg.POOL_TYPE = 'avg'
cfg.CLS_HEAD = 'linear'
cfg.MODEL_ACTIVATION = 'swish'
cfg.DROP_CONNECT = 0.2
cfg.DROPOUT= 0.
cfg.NUM_WORD_CLASSES = 1295
cfg.NUM_GRAPHEME_CLASSES = 168
cfg.NUM_VOWEL_CLASSES = 11
cfg.NUM_CONSONANT_CLASSES = 7
cfg.CKP_NAME = 'model4_eb4_fold1.pth'
'''
class BengaliEfficientNet(nn.Module):
    """
    EfficientNet B0-B8.
    Args:
        cfg (CfgNode): configs
    """
    def __init__(self, backbone_name):
        super(BengaliEfficientNet, self).__init__()
        #model_name = cfg.MODEL_NAME
        pretrained = False
        input_channels = 1
        pool_type = 'avg'
        drop_connect_rate = 0.2
        self.drop_rate = 0.
        cls_head = 'linear'
        num_total_classes = 168+11+7+1295

        backbone = timm.create_model(
            model_name=backbone_name,
            pretrained=pretrained,
            in_chans=input_channels,
            drop_connect_rate=drop_connect_rate,
        )
        self.conv_stem = backbone.conv_stem
        self.bn1 = backbone.bn1
        self.act1 = backbone.act1
        ### Original blocks ###
        for i in range(len((backbone.blocks))):
            setattr(self, "block{}".format(str(i)), backbone.blocks[i])
        self.conv_head = backbone.conv_head
        self.bn2 = backbone.bn2
        self.act2 = backbone.act2
        self.aux_block5 = backbone.blocks[5]
        self.aux_num_features = self.block5[-1].bn3.num_features
        self.aux_head4 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn4 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act4 = Swish()
        self.aux_head5 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn5 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act5 = Swish()
        self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
        self.num_features = backbone.num_features * self.global_pool.feat_mult()
        assert cls_head == 'linear'
        if cls_head == "linear":
            ### Baseline head ###
            self.fc = nn.Linear(self.num_features, num_total_classes)            
            self.aux_fc1 = nn.Linear(self.aux_num_features*4, num_total_classes)
            self.aux_fc2 = nn.Linear(self.aux_num_features*4, num_total_classes)
            
            for fc in [self.fc, self.aux_fc1, self.aux_fc2]:
                nn.init.zeros_(fc.bias.data)
        #elif cls_head == "norm_softmax":
            ### NormSoftmax ###
            #self.grapheme_fc = NormSoftmax(self.num_features, num_grapheme_classes)
            #self.consonant_fc = NormSoftmax(self.num_features, num_consonant_classes)
            #self.vowel_fc = NormSoftmax(self.num_features, num_vowel_classes)
        # Replace with Mish activation
        #if cfg.MODEL_ACTIVATION == "mish":
        #    convert_swish_to_mish(self)
        #del backbone

    def _features(self, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.block0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x); b4 = x
        x = self.block5(x); b4 = self.aux_block5(b4); b5 = x
        x = self.block6(x)
        x = self.conv_head(x); b4 = self.aux_head4(b4); b5 = self.aux_head5(b5)
        x = self.bn2(x); b4 = self.bn4(b4); b5 = self.bn5(b5)
        x = self.act2(x); b4 = self.act4(b4); b5 = self.act5(b5)
        return b4, b5, x

    def forward(self, x):
        x = x.clone()
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)

        # _, _, x = self._features(x)
        b4, b5, x = self._features(x)
        x = self.global_pool(x); b4 = self.global_pool(b4); b5 = self.global_pool(b5)
        x = torch.flatten(x, 1); b4 = torch.flatten(b4, 1); b5 = torch.flatten(b5, 1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        logits = self.fc(x)
        
        aux_logits1 = self.aux_fc1(b4)
        aux_logits2 = self.aux_fc2(b5)
        
        return logits #, aux_logits1, aux_logits2

In [108]:
def create_model(backbone, model_file, model_type):
    if model_type == 'BengaliNet3':
        model = BengaliNet3(backbone_name=backbone)
    elif model_type == 'BengaliEfficientNet':
        model = BengaliEfficientNet(backbone_name=backbone)
    elif model_type == 'BengaliResNet':
        model = BengaliResNet(backbone_name=backbone)
    else:
        raise ValueError('wrong model type')
    #model_file = os.path.join(MODEL_DIR, ckp_name)

    assert os.path.exists(model_file)
    print('loading {}...'.format(model_file))
    model.load_state_dict(torch.load(model_file))
    
    return model

In [109]:
def create_models():
    models = []
    for backbone, model_file, model_type in ckp_list:
        model = create_model(backbone, model_file, model_type).cuda()
        if ENABLE_APEX:
            model = amp.initialize(model, None, opt_level="O1",verbosity=0)
        model.eval()
        models.append(model)
    return models

# predict

In [116]:
!ls ./model4-ckps

se_resnext50_32x4d  tf_efficientnet_b4


In [15]:
!ls /kaggle/input/model4-weights

model4_eb4_fold1_cv997705.pth	   model4_se_resnext50_fold0_224_cv9976.pth
model4_eb4_fold1_cv998144.pth	   model4_se_resnext50_fold0_224_cv9977.pth
model4_eb4_fold2_cv9976.pth	   model4_se_resnext50_fold0_224_cv9978.pth
model4_eb4_fold3_cv9971.pth	   model4_se_resnext50_fold0_224_cv998106.pth
model4_eb4_fold3_cv998185_swa.pth  model4_se_resnext50_fold4_224_cv997979.pth
model4_eb4_fold3_cv998307_swa.pth


In [16]:
!ls /kaggle/input/model3-weights

model3_se_resnext50_fold0_mixup_cutmix_224_gridmask_cv9974.pth
model3_se_resnext50_fold0_mixup_cutmix_224_gridmask_cv9976.pth
model3_se_resnext50_fold1_mixup_cutmix_224_gridmask_cv9965.pth
model3_se_resnext50_fold1_mixup_cutmix_224_gridmask_cv9970.pth
model3_se_resnext50_fold4_mixup_cutmix_224_gridmask_cv9974.pth


In [117]:
ckp_list = [
    ('se_resnext50_32x4d', './model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224_cv998106.pth', 'BengaliResNet'), # lb 9886
    ('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold1_cv998144.pth', 'BengaliEfficientNet'),  # lb9881
    ('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_cv998185_swa.pth', 'BengaliEfficientNet'),  # lb9891
    ('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_cv9976.pth', 'BengaliEfficientNet'),  # lb9889
    ('se_resnext50_32x4d', './model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold4_224_cv997979.pth', 'BengaliResNet') # lb 9882
    #('se_resnext50_32x4d', '/kaggle/input/model4-weights/model4_se_resnext50_fold0_224_cv9977.pth', 'BengaliResNet'), # lb 9884
    #('se_resnext50_32x4d', '/kaggle/input/model3-weights/model3_se_resnext50_fold4_mixup_cutmix_224_gridmask_cv9974.pth', 'BengaliNet3'), # lb 9877
    #('tf_efficientnet_b4', './model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_cv998307_swa.pth', 'BengaliEfficientNet')  # lb9892
]
#model_weights = [0.3, 0.3, 0.4]
#model_weights = [0.5, 0.5]
#model_weights = [1.]
model_weights = [0.2, 0.15, 0.25, 0.25, 0.15]

In [121]:
def predict(models, test_loader):
    preds0, preds1,preds2 = [], [], []
    with torch.no_grad():
        for x in test_loader:
            x = x.cuda()
            outputs0, outputs1, outputs2 = [], [], []
            for i, model in enumerate(models):
                output = model(x.clone())
                print(i, output.cpu().numpy())
                output = torch.split(output[:, :186], [168, 11, 7], dim=1)
                outputs0.append(torch.softmax(output[0], dim=1).cpu().numpy())
                outputs1.append(torch.softmax(output[1], dim=1).cpu().numpy())
                outputs2.append(torch.softmax(output[2], dim=1).cpu().numpy())
            outputs0 = np.average(outputs0, 0, weights=model_weights)
            outputs1 = np.average(outputs1, 0, weights=model_weights)
            outputs2 = np.average(outputs2, 0, weights=model_weights)
            
            preds0.append(np.argmax(outputs0, 1))
            preds1.append(np.argmax(outputs1, 1))
            preds2.append(np.argmax(outputs2, 1))
            #preds = get_predictions(outputs0, outputs1, outputs2)
            #preds0.append(preds[0])
            #preds1.append(preds[1])
            #preds2.append(preds[2])
            
            
    preds0 = np.concatenate(preds0, 0)
    preds1 = np.concatenate(preds1, 0)
    preds2 = np.concatenate(preds2, 0)
    
    return preds0, preds1, preds2

In [119]:
models = create_models()

init model4
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224_cv998106.pth...
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1_cv998144.pth...
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_cv998185_swa.pth...
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_cv9976.pth...
init model4
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold4_224_cv997979.pth...


In [94]:
models[1].training

False

In [122]:
# outputs for  model2, model1
import gc

preds0, preds1, preds2 = [], [], []

for i in range(4):
    test_loader = get_test_loader(batch_size=BATCH_SIZE, idx=i)
    p0, p1, p2 = predict(models, test_loader)
    preds0.append(p0)
    preds1.append(p1)
    preds2.append(p2)
    del test_loader
    gc.collect()
    
preds0 = np.concatenate(preds0, 0)
preds1 = np.concatenate(preds1, 0)
preds2 = np.concatenate(preds2, 0)

0 [[ -7.1862817   -7.810594    -4.8198237  ...  -0.8179925   -0.19777063
   -1.4713253 ]
 [-10.734917   -11.806205    -9.45489    ...  -1.3461176   -1.3830768
   -2.1137755 ]
 [ -6.3235493   -7.6116095   -6.1489162  ...  -0.04808476  -0.15125619
   -0.95830286]]
1 [[-2.6705704  -2.517937    1.3185531  ... -0.9167047  -0.52865475
  -1.770554  ]
 [-3.2127757  -3.5128353  -2.142935   ... -0.4858613  -1.6771361
  -1.5057656 ]
 [-1.9421563  -2.1973522  -1.5409075  ...  0.7223144  -0.49772972
  -0.83899623]]
2 [[-1.9815211  -2.3962836   1.8050307  ...  0.41863483 -0.03356555
  -1.1492089 ]
 [-3.0018673  -3.3024516  -2.4305837  ... -0.40422902 -0.93963295
  -0.61511666]
 [-1.7645657  -2.323179   -1.6421713  ...  0.4624252  -0.86281157
  -0.55969614]]
3 [[-2.8739367  -1.683599    1.6972646  ...  0.10131954  0.1774449
  -0.8141838 ]
 [-3.6307256  -2.5702085  -2.1647127  ... -0.4519977  -0.83477646
  -1.1875901 ]
 [-1.7564092  -1.4671712  -0.9772157  ...  0.57132506 -0.07662015
  -1.2050614 ]]
4

In [73]:
#models[0]

In [66]:
# outputs for  model2
import gc

preds0, preds1, preds2 = [], [], []

for i in range(4):
    test_loader = get_test_loader(batch_size=BATCH_SIZE, idx=i)
    p0, p1, p2 = predict(models, test_loader)
    preds0.append(p0)
    preds1.append(p1)
    preds2.append(p2)
    del test_loader
    gc.collect()
    
preds0 = np.concatenate(preds0, 0)
preds1 = np.concatenate(preds1, 0)
preds2 = np.concatenate(preds2, 0)

0 [[-1.9815211  -2.3962836   1.8050307  ...  0.41863483 -0.03356555
  -1.1492089 ]
 [-3.0018673  -3.3024516  -2.4305837  ... -0.40422902 -0.93963295
  -0.61511666]
 [-1.7645657  -2.323179   -1.6421713  ...  0.4624252  -0.86281157
  -0.55969614]]
0 [[-4.1630816  -3.6187284   0.17257023 ... -1.2305492  -2.069665
  -2.4557526 ]
 [-2.6643453  -3.1623561  -1.7414362  ... -0.4931431  -0.7689832
  -1.1394364 ]
 [-3.10076    -3.202427   -1.6731133  ... -0.31856272 -1.120518
  -1.7250777 ]]
0 [[-3.584045   -3.5256333  -2.3599143  ... -0.5943073   0.32120028
  -0.10669224]
 [-2.1545324  -3.073289   -1.950397   ...  0.63448155 -0.33607846
  -0.72417825]
 [-2.5658965  -3.059737   -1.3301737  ...  0.04588587  2.2594516
  -1.2172878 ]]
0 [[-1.1741327  -2.6963952  -1.2545342  ... -0.1334528  -0.43743262
  -1.0867112 ]
 [-3.4942737  -3.3348353  -0.83675057 ... -0.29198718 -0.00458399
   1.2165972 ]
 [-2.3303752  -3.152591   -1.2400956  ... -0.07229826 -0.74317056
  -0.65666854]]


In [62]:
# outputs for  model1
import gc

preds0, preds1, preds2 = [], [], []

for i in range(4):
    test_loader = get_test_loader(batch_size=BATCH_SIZE, idx=i)
    p0, p1, p2 = predict(models, test_loader)
    preds0.append(p0)
    preds1.append(p1)
    preds2.append(p2)
    del test_loader
    gc.collect()
    
preds0 = np.concatenate(preds0, 0)
preds1 = np.concatenate(preds1, 0)
preds2 = np.concatenate(preds2, 0)

0 [[-2.8739367  -1.683599    1.6972646  ...  0.10131954  0.1774449
  -0.8141838 ]
 [-3.6307256  -2.5702085  -2.1647127  ... -0.4519977  -0.83477646
  -1.1875901 ]
 [-1.7564092  -1.4671712  -0.9772157  ...  0.57132506 -0.07662015
  -1.2050614 ]]
0 [[-3.599402   -2.3611097   0.18198377 ...  0.6890877  -1.0891328
  -2.8407648 ]
 [-1.1345098  -1.221085   -2.1812642  ...  0.10280535 -0.9225764
  -0.7418132 ]
 [-3.4843976  -2.7565088  -2.2552967  ... -0.06568424 -1.5142089
  -2.015525  ]]
0 [[-1.8734746  -2.8732915  -0.7773968  ...  0.49059683  1.9874496
  -0.41183412]
 [-2.6842942  -2.7795134  -1.7300556  ...  0.9316405  -0.663863
  -0.82204324]
 [-2.8197932  -2.6598098  -1.4742347  ...  1.1284032   3.1524918
  -0.7613689 ]]
0 [[-2.9233952  -3.909344   -1.8719174  ... -1.3790128  -0.7894496
  -2.395862  ]
 [-2.4501138  -2.974061   -2.4754174  ... -0.73111653 -0.94749844
   3.2835867 ]
 [-3.427409   -4.8262725  -2.586266   ... -1.0050374  -2.1204722
  -2.352977  ]]


In [59]:
# outputs for 2 models
import gc

preds0, preds1, preds2 = [], [], []

for i in range(4):
    test_loader = get_test_loader(batch_size=BATCH_SIZE, idx=i)
    p0, p1, p2 = predict(models, test_loader)
    preds0.append(p0)
    preds1.append(p1)
    preds2.append(p2)
    del test_loader
    gc.collect()
    
preds0 = np.concatenate(preds0, 0)
preds1 = np.concatenate(preds1, 0)
preds2 = np.concatenate(preds2, 0)

0 [[-2.8739367  -1.683599    1.6972646  ...  0.10131954  0.1774449
  -0.8141838 ]
 [-3.6307256  -2.5702085  -2.1647127  ... -0.4519977  -0.83477646
  -1.1875901 ]
 [-1.7564092  -1.4671712  -0.9772157  ...  0.57132506 -0.07662015
  -1.2050614 ]]
1 [[-1138.3181    -1200.3625     -637.5423    ...  -443.79025
   -169.7639     -324.2416   ]
 [ -417.66345    -511.23328    -278.062     ...   -86.338554
    -67.46889       2.6422465]
 [ -857.3012    -1186.0989     -561.0034    ...   -76.796364
   -310.8326     -187.83319  ]]
0 [[-3.599402   -2.3611097   0.18198377 ...  0.6890877  -1.0891328
  -2.8407648 ]
 [-1.1345098  -1.221085   -2.1812642  ...  0.10280535 -0.9225764
  -0.7418132 ]
 [-3.4843976  -2.7565088  -2.2552967  ... -0.06568424 -1.5142089
  -2.015525  ]]
1 [[ -966.835    -1082.6678    -423.4504   ...  -558.5766    -586.00464
   -331.2461  ]
 [ -237.67717   -386.17957   -189.19699  ...   -34.76139    -51.678402
    -30.83259 ]
 [ -113.84788   -155.35092    -84.28233  ...   -22.731848  

In [57]:
#preds2

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [115]:
row_id = []
target = []
for i in tqdm(range(len(preds0))):
    row_id += [f'Test_{i}_grapheme_root', f'Test_{i}_vowel_diacritic',
               f'Test_{i}_consonant_diacritic']
    target += [preds0[i], preds1[i], preds2[i]]
submission_df = pd.DataFrame({'row_id': row_id, 'target': target})
#submission_df.to_csv('submission.csv', index=False)
submission_df.head()

100%|██████████| 12/12 [00:00<00:00, 66225.85it/s]


Unnamed: 0,row_id,target
0,Test_0_grapheme_root,3
1,Test_0_vowel_diacritic,0
2,Test_0_consonant_diacritic,0
3,Test_1_grapheme_root,93
4,Test_1_vowel_diacritic,2


In [35]:
submission_df.to_csv('submission.csv', index=False)

In [36]:
submission_df

Unnamed: 0,row_id,target
0,Test_0_grapheme_root,148
1,Test_0_vowel_diacritic,0
2,Test_0_consonant_diacritic,0
3,Test_1_grapheme_root,107
4,Test_1_vowel_diacritic,1
5,Test_1_consonant_diacritic,0
6,Test_2_grapheme_root,107
7,Test_2_vowel_diacritic,1
8,Test_2_consonant_diacritic,0
9,Test_3_grapheme_root,89
