In [3]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet

In [2]:
!ls /mnt/chicm/data/bengali

bengaliai-cv19.zip	   test_image_data_3.parquet
class_map.csv		   train.csv
sample_submission.csv	   train_image_data_0.parquet
test.csv		   train_image_data_1.parquet
test_image_data_0.parquet  train_image_data_2.parquet
test_image_data_1.parquet  train_image_data_3.parquet
test_image_data_2.parquet


In [3]:
#!ls /home/chec/data/bengali

In [8]:
DATA_DIR = '/mnt/chicm/data/bengali'

In [9]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [10]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [11]:
import albumentations as albu

def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)

In [12]:
#plt.imshow(x)

In [13]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

HEIGHT = 137
WIDTH = 236

class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            img = augs(image=img)['image']
        
        img = np.expand_dims(img, axis=-1)
        
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
    train_df = shuffle(train_df, random_state=1234)
    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [10]:
#train_loader, val_loader = get_train_val_loaders(dev_mode=True)

# model

In [11]:
#import pretrainedmodels

In [12]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [13]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [14]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [15]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

In [16]:
#model.last_linear.in_features

In [14]:
class BengaliNet_1(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
    def logits(self, x):
        x = self.avg_pool(x)
        x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [15]:
class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        #self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        self.fix_input_layer()
        
    def fix_input_layer(self):
        if self.backbone_name in ['se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50', 'senet154', 'se_resnet152', 'nasnetmobile', 'mobilenet', 'nasnetalarge']:
            #self.backbone = eval(backbone_name)()
            #print(self.backbone.layer0.conv1)
            w = self.backbone.layer0.conv1.weight.data
            self.backbone.layer0.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
            #self.backbone.layer0.conv1.weight = torch.nn.Parameter(torch.cat((w, w[:, 2, :, :].unsqueeze(1)), dim=1))
            self.backbone.layer0.conv1.weight = torch.nn.Parameter(w[:, 0, :, :].unsqueeze(1))
        
    def logits(self, x):
        x = self.avg_pool(x)
        x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        #x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [16]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [20]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [21]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [22]:
round(1/9, 6)

0.111111

In [17]:
import numpy as np
import sklearn.metrics
import torch


def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    #y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y_grapheme, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y_vowel, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y_consonant, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score

def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    
    return metrics

In [18]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    
    return loss0 + loss1 + loss2 #, loss0.item(), loss1.item(), loss2.item()

In [1]:
!ls

bengali-ensemble.ipynb
bengali-inference.ipynb
eda.ipynb
efficient-net.ipynb
Inference.ipynb
model1-efficient-b5-fold1-cv9841-lb9725.ipynb
model1-efficient-b5-fold1-focal_loss.ipynb
model1-se_resnext50-5fold-0.ipynb
model2-efficientnet-b5-5fold-1.ipynb
model2-seresnext50-5fold-0.ipynb
model2-seresnext50-5fold-0-post-process.ipynb
model2-seresnext50-5fold-2.ipynb
models
over9000
post_process.pth
single-model-apex-cutmix-5fold-0-se154.ipynb
single-model-apex-cutmix-5fold-1-efficient-b6.ipynb
single-model-apex-cutmix-5fold-4-seresnext101.ipynb
single-model-apex-cutmix-efficient-densenet.ipynb
single-model-apex-cutmix-lb9699-cv9848-224-mixup.ipynb
single-model-apex-cutmix-lb9699-cv9848.ipynb
single_model.ipynb
single_model_lb9680_cv9828.ipynb
tmpx


In [52]:
post_process_dict = torch.load('post_process.pth')
vowel_dict = post_process_dict['vowel_dict']
consonant_dict = post_process_dict['consonant_dict']

In [53]:
consonant_dict

{0: tensor([], dtype=torch.int64),
 1: tensor([  0,   1,   2,   5,   7,   8,  10,  11,  12,  14,  15,  16,  17,  18,
          19,  20,  21,  24,  25,  26,  27,  28,  30,  31,  32,  33,  34,  35,
          36,  37,  39,  40,  41,  43,  44,  45,  46,  47,  49,  50,  51,  52,
          53,  54,  56,  57,  58,  59,  60,  61,  62,  63,  65,  66,  67,  68,
          69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,  83,  84,
          85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  97,  98,  99,
         100, 101, 102, 104, 105, 106, 108, 109, 110, 111, 112, 114, 115, 116,
         117, 118, 119, 120, 121, 122, 124, 125, 126, 127, 128, 129, 130, 131,
         132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
         146, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161,
         162, 163, 164, 165, 166, 167]),
 2: tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  14,
          15,  16,  17,  18,  19,  20,  24,  25, 

In [76]:
def get_predictions(output0, output1, output2):
    #outputs = torch.split(outputs, [168, 11, 7], dim=1)
    output1 = torch.softmax(output1, dim=1)   
    output2 = torch.softmax(output2, dim=1)        
    
    score_vowel, preds_vowel = torch.max(output1, dim=1)
    score_consonant, preds_consonant = torch.max(output2, dim=1)
    
    for i in range(output0.size(0)):
        #if preds_vowel[i].item() != 0:
        if score_vowel[i].item() > 0.3:
            output0[i, vowel_dict[preds_vowel[i].item()]] = -100000.
        #if preds_consonant[i].item() != 0:
        if score_consonant[i].item() > 0.3:
            output0[i, consonant_dict[preds_consonant[i].item()]] = -100000.
    preds_grapheme = torch.max(output0, dim=1)[1]
    
    return preds_grapheme, preds_vowel, preds_consonant

In [77]:
from tqdm import tqdm

In [78]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2 = 0., 0., 0.
    preds0, preds1,preds2 = [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in tqdm(val_loader):
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds = get_predictions(outputs[0], outputs[1], outputs[2])
            preds0.append(preds[0])
            preds1.append(preds[1])
            preds2.append(preds[2])
            
            #preds0.append(torch.max(split_outputs[0], dim=1)[1])
            #preds1.append(torch.max(split_outputs[1], dim=1)[1])
            #preds2.append(torch.max(split_outputs[2], dim=1)[1])

            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    
    return metrics
            

In [79]:
print(validate(model, val_loader))

100%|██████████| 40/40 [00:45<00:00,  1.13s/it]

{'recall': 0.985201, 'recall_grapheme': 0.977361, 'recall_vowel': 0.992751, 'recall_consonant': 0.993332, 'acc_grapheme': 0.977736, 'acc_vowel': 0.993713, 'acc_consonant': 0.993042, 'loss_grapheme': 201.399638, 'loss_vowel': 0.08522, 'loss_consonant': 0.065171}





In [63]:
print(validate(model, val_loader))

100%|██████████| 40/40 [00:43<00:00,  1.09s/it]

{'recall': 0.985201, 'recall_grapheme': 0.977361, 'recall_vowel': 0.992751, 'recall_consonant': 0.993332, 'acc_grapheme': 0.977736, 'acc_vowel': 0.993713, 'acc_consonant': 0.993042, 'loss_grapheme': 201.399638, 'loss_vowel': 0.08522, 'loss_consonant': 0.065171}





In [57]:
print(validate(model, val_loader))

100%|██████████| 40/40 [00:43<00:00,  1.08s/it]

{'recall': 0.985159, 'recall_grapheme': 0.977276, 'recall_vowel': 0.992751, 'recall_consonant': 0.993332, 'acc_grapheme': 0.977711, 'acc_vowel': 0.993713, 'acc_consonant': 0.993042, 'loss_grapheme': 198.914883, 'loss_vowel': 0.08522, 'loss_consonant': 0.065171}





In [23]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [24]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [25]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [26]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [27]:
np.random.random()

0.29251408144274393

In [28]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam

In [29]:
def train(args):
    global model

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    elif args.optim == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr)
    elif args.optim == 'Over9000':
        optimizer = Over9000(model.parameters(), lr=args.lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr)
        
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    #optimizer.zero_grad()

    #if args.lrs == 'plateau':
    #    lr_scheduler.step(best_metrics)
    #else:
    #    lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.num_epochs):
        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            #do_mixup = False #(np.random.random() < 0.4)
            
            #if do_mixup:
            #    img, targets = mixup(img, targets)
            batch_size = img.size(0)
          
            
            
            #if do_mixup:
            #    loss = mixup_criterion(outputs, targets)
            #else:
            #    loss = criterion(outputs, targets)
            r = np.random.rand()
            #if args.beta > 0 and r < args.cutmix_prob:
            if r < 0.9:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            else:
                #img, targets = mixup(img, targets)
                outputs = model(img)
                #loss = mixup_criterion(outputs, targets)
                loss = criterion(outputs, targets)
            
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            
            #loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            
            #if batch_idx % 4 == 0:
            #    optimizer.step()
            #    optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                #outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
                #preds0 = (torch.max(outputs[0], dim=1)[1]).cpu().numpy()
                #preds1 = (torch.max(outputs[1], dim=1)[1]).cpu().numpy()
                #preds2 = (torch.max(outputs[2], dim=1)[1]).cpu().numpy()
                #train_metrics = calc_metrics(preds0, preds1, preds2, targets.cpu().numpy())
                #print('train:', train_metrics)
                #save_model(model, model_file+'_latest')
                val_metrics = validate(model, val_loader)
                print('\nval:', val_metrics)
                
                if val_metrics[best_key] > best_metrics:
                    best_metrics = val_metrics[best_key]
                    save_model(model, model_file)
                    print('** saved')
                
                model.train()
                
                if args.lrs == 'plateau':
                    lr_scheduler.step(best_metrics)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
        
    

In [30]:
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'model2_se_resnext50_fold0.pth'
args.predict = False
args.optim = 'RAdam'
args.lr = 2e-5
args.lrs = 'cosine'
args.t_max = 15
args.factor = 0.6
args.patience = 0
args.min_lr = 1e-6
args.iter_val = 200
args.num_epochs = 100000
args.batch_size = 1024
args.val_batch_size = 1024

args.beta = 1.0
args.cutmix_prob = 0.5

In [31]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=0)

(200840, 5)
(200840, 32332)
(160596, 5) (40244, 5)


In [32]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


model file: ./models/se_resnext50_32x4d/model2_se_resnext50_fold0.pth, exist: True
loading ./models/se_resnext50_32x4d/model2_se_resnext50_fold0.pth...


In [33]:
print(validate(model, val_loader))

{'recall': 0.984116, 'recall_grapheme': 0.97519, 'recall_vowel': 0.992751, 'recall_consonant': 0.993332, 'acc_grapheme': 0.977313, 'acc_vowel': 0.993713, 'acc_consonant': 0.993042, 'loss_grapheme': 0.142988, 'loss_vowel': 0.08522, 'loss_consonant': 0.065171}


In [36]:
train(args)

{'recall': 0.983745, 'recall_grapheme': 0.974981, 'recall_vowel': 0.992261, 'recall_consonant': 0.992757, 'acc_grapheme': 0.976046, 'acc_vowel': 0.993167, 'acc_consonant': 0.992297, 'loss_grapheme': 0.103041, 'loss_vowel': 0.042857, 'loss_consonant': 0.038426}
    1 | 0.000020 | 045056/160596 | 2.0154 | 2.4724 |
val: {'recall': 0.982875, 'recall_grapheme': 0.973581, 'recall_vowel': 0.991425, 'recall_consonant': 0.992913, 'acc_grapheme': 0.975375, 'acc_vowel': 0.992769, 'acc_consonant': 0.991676, 'loss_grapheme': 0.185012, 'loss_vowel': 0.117962, 'loss_consonant': 0.087933}




    2 | 0.000020 | 090112/160596 | 4.5338 | 2.7222 |
val: {'recall': 0.98232, 'recall_grapheme': 0.972734, 'recall_vowel': 0.990797, 'recall_consonant': 0.993016, 'acc_grapheme': 0.974679, 'acc_vowel': 0.992396, 'acc_consonant': 0.991527, 'loss_grapheme': 0.230635, 'loss_vowel': 0.156667, 'loss_consonant': 0.114001}
    3 | 0.000019 | 135168/160596 | 4.5807 | 2.9073 |
val: {'recall': 0.981842, 'recall_grapheme': 0.971744, 'recall_vowel': 0.991033, 'recall_consonant': 0.992845, 'acc_grapheme': 0.974058, 'acc_vowel': 0.992446, 'acc_consonant': 0.991328, 'loss_grapheme': 0.296629, 'loss_vowel': 0.20651, 'loss_consonant': 0.144226}
    5 | 0.000018 | 020480/160596 | 0.0051 | 1.9522 |
val: {'recall': 0.983169, 'recall_grapheme': 0.974084, 'recall_vowel': 0.991507, 'recall_consonant': 0.993001, 'acc_grapheme': 0.975499, 'acc_vowel': 0.992695, 'acc_consonant': 0.991899, 'loss_grapheme': 0.12782, 'loss_vowel': 0.07325, 'loss_consonant': 0.057126}
    6 | 0.000017 | 065536/160596 | 4.3425 | 2.9

   35 | 0.000018 | 143360/160596 | 3.1996 | 2.6377 |
val: {'recall': 0.982709, 'recall_grapheme': 0.973953, 'recall_vowel': 0.991048, 'recall_consonant': 0.991884, 'acc_grapheme': 0.975226, 'acc_vowel': 0.992869, 'acc_consonant': 0.991502, 'loss_grapheme': 0.230154, 'loss_vowel': 0.15357, 'loss_consonant': 0.111495}
   37 | 0.000019 | 028672/160596 | 3.2622 | 3.1381 |
val: {'recall': 0.982646, 'recall_grapheme': 0.973213, 'recall_vowel': 0.991256, 'recall_consonant': 0.992902, 'acc_grapheme': 0.974754, 'acc_vowel': 0.992695, 'acc_consonant': 0.991701, 'loss_grapheme': 0.22379, 'loss_vowel': 0.153706, 'loss_consonant': 0.108864}
   38 | 0.000020 | 073728/160596 | 0.0069 | 2.6257 |
val: {'recall': 0.983266, 'recall_grapheme': 0.97406, 'recall_vowel': 0.991862, 'recall_consonant': 0.993083, 'acc_grapheme': 0.975673, 'acc_vowel': 0.992968, 'acc_consonant': 0.992173, 'loss_grapheme': 0.149001, 'loss_vowel': 0.090988, 'loss_consonant': 0.069694}
   39 | 0.000020 | 118784/160596 | 4.1178 | 2.

   69 | 0.000011 | 036864/160596 | 3.1474 | 3.0666 |
val: {'recall': 0.982213, 'recall_grapheme': 0.973034, 'recall_vowel': 0.991179, 'recall_consonant': 0.991604, 'acc_grapheme': 0.975127, 'acc_vowel': 0.992521, 'acc_consonant': 0.9918, 'loss_grapheme': 0.213629, 'loss_vowel': 0.144392, 'loss_consonant': 0.103826}
   70 | 0.000013 | 081920/160596 | 2.9054 | 2.7230 |
val: {'recall': 0.982278, 'recall_grapheme': 0.972957, 'recall_vowel': 0.991503, 'recall_consonant': 0.991695, 'acc_grapheme': 0.975127, 'acc_vowel': 0.992695, 'acc_consonant': 0.991825, 'loss_grapheme': 0.208335, 'loss_vowel': 0.140419, 'loss_consonant': 0.101167}
   71 | 0.000015 | 126976/160596 | 2.8766 | 2.5745 |
val: {'recall': 0.982132, 'recall_grapheme': 0.972578, 'recall_vowel': 0.991637, 'recall_consonant': 0.991734, 'acc_grapheme': 0.974754, 'acc_vowel': 0.992645, 'acc_consonant': 0.991353, 'loss_grapheme': 0.305234, 'loss_vowel': 0.209053, 'loss_consonant': 0.146568}
   73 | 0.000017 | 012288/160596 | 4.4477 | 3

  102 | 0.000004 | 090112/160596 | 0.1357 | 2.3549 |
val: {'recall': 0.982639, 'recall_grapheme': 0.973928, 'recall_vowel': 0.991036, 'recall_consonant': 0.991661, 'acc_grapheme': 0.976021, 'acc_vowel': 0.992719, 'acc_consonant': 0.992049, 'loss_grapheme': 0.191546, 'loss_vowel': 0.127376, 'loss_consonant': 0.092514}
  103 | 0.000006 | 135168/160596 | 0.2224 | 2.8519 |
val: {'recall': 0.982627, 'recall_grapheme': 0.973305, 'recall_vowel': 0.991825, 'recall_consonant': 0.992074, 'acc_grapheme': 0.975748, 'acc_vowel': 0.993067, 'acc_consonant': 0.992098, 'loss_grapheme': 0.204663, 'loss_vowel': 0.142699, 'loss_consonant': 0.101941}
  105 | 0.000008 | 020480/160596 | 0.4235 | 2.6806 |
val: {'recall': 0.982826, 'recall_grapheme': 0.973917, 'recall_vowel': 0.991522, 'recall_consonant': 0.991948, 'acc_grapheme': 0.975922, 'acc_vowel': 0.992993, 'acc_consonant': 0.991949, 'loss_grapheme': 0.228592, 'loss_vowel': 0.152386, 'loss_consonant': 0.111011}
  106 | 0.000010 | 065536/160596 | 3.0622 |

  135 | 0.000001 | 143360/160596 | 3.2907 | 2.5674 |
val: {'recall': 0.98284, 'recall_grapheme': 0.97405, 'recall_vowel': 0.991278, 'recall_consonant': 0.991981, 'acc_grapheme': 0.976543, 'acc_vowel': 0.992893, 'acc_consonant': 0.992322, 'loss_grapheme': 0.206174, 'loss_vowel': 0.138541, 'loss_consonant': 0.098892}
  137 | 0.000001 | 028672/160596 | 4.4734 | 2.7133 |
val: {'recall': 0.982423, 'recall_grapheme': 0.973162, 'recall_vowel': 0.991304, 'recall_consonant': 0.992065, 'acc_grapheme': 0.975922, 'acc_vowel': 0.992893, 'acc_consonant': 0.992049, 'loss_grapheme': 0.252126, 'loss_vowel': 0.168241, 'loss_consonant': 0.118903}
  138 | 0.000002 | 073728/160596 | 3.8734 | 2.6939 |
val: {'recall': 0.98244, 'recall_grapheme': 0.973352, 'recall_vowel': 0.991124, 'recall_consonant': 0.99193, 'acc_grapheme': 0.975798, 'acc_vowel': 0.992695, 'acc_consonant': 0.99185, 'loss_grapheme': 0.285952, 'loss_vowel': 0.196876, 'loss_consonant': 0.134718}
  139 | 0.000003 | 118784/160596 | 1.7805 | 2.59

  169 | 0.000004 | 036864/160596 | 3.2229 | 2.4405 |
val: {'recall': 0.982978, 'recall_grapheme': 0.9741, 'recall_vowel': 0.991696, 'recall_consonant': 0.992018, 'acc_grapheme': 0.976642, 'acc_vowel': 0.993365, 'acc_consonant': 0.992595, 'loss_grapheme': 0.130097, 'loss_vowel': 0.075256, 'loss_consonant': 0.059096}
  170 | 0.000003 | 081920/160596 | 2.7316 | 3.0466 |
val: {'recall': 0.982551, 'recall_grapheme': 0.973436, 'recall_vowel': 0.991226, 'recall_consonant': 0.992106, 'acc_grapheme': 0.976071, 'acc_vowel': 0.993042, 'acc_consonant': 0.992098, 'loss_grapheme': 0.235771, 'loss_vowel': 0.153329, 'loss_consonant': 0.11236}
  171 | 0.000002 | 126976/160596 | 3.0746 | 2.7222 |
val: {'recall': 0.983051, 'recall_grapheme': 0.97413, 'recall_vowel': 0.991744, 'recall_consonant': 0.992199, 'acc_grapheme': 0.976493, 'acc_vowel': 0.993216, 'acc_consonant': 0.992521, 'loss_grapheme': 0.174377, 'loss_vowel': 0.110486, 'loss_consonant': 0.082632}
  173 | 0.000001 | 012288/160596 | 0.0064 | 2.1

  202 | 0.000011 | 090112/160596 | 3.6882 | 2.7306 |
val: {'recall': 0.983333, 'recall_grapheme': 0.974253, 'recall_vowel': 0.99157, 'recall_consonant': 0.993255, 'acc_grapheme': 0.976642, 'acc_vowel': 0.992993, 'acc_consonant': 0.992322, 'loss_grapheme': 0.210327, 'loss_vowel': 0.139612, 'loss_consonant': 0.10247}
  203 | 0.000010 | 135168/160596 | 4.1768 | 2.4707 |
val: {'recall': 0.983521, 'recall_grapheme': 0.974365, 'recall_vowel': 0.992202, 'recall_consonant': 0.993152, 'acc_grapheme': 0.976941, 'acc_vowel': 0.993515, 'acc_consonant': 0.992695, 'loss_grapheme': 0.153233, 'loss_vowel': 0.094962, 'loss_consonant': 0.07231}
  205 | 0.000008 | 020480/160596 | 2.6844 | 2.6417 |
val: {'recall': 0.983164, 'recall_grapheme': 0.973737, 'recall_vowel': 0.991782, 'recall_consonant': 0.9934, 'acc_grapheme': 0.976593, 'acc_vowel': 0.993067, 'acc_consonant': 0.992421, 'loss_grapheme': 0.173772, 'loss_vowel': 0.113096, 'loss_consonant': 0.084655}
  206 | 0.000006 | 065536/160596 | 3.0368 | 2.67

  235 | 0.000018 | 143360/160596 | 0.0074 | 2.5929 |
val: {'recall': 0.9839, 'recall_grapheme': 0.975784, 'recall_vowel': 0.992169, 'recall_consonant': 0.991862, 'acc_grapheme': 0.977388, 'acc_vowel': 0.993713, 'acc_consonant': 0.992794, 'loss_grapheme': 0.149174, 'loss_vowel': 0.088149, 'loss_consonant': 0.067614}
  237 | 0.000017 | 028672/160596 | 3.4314 | 2.5235 |
val: {'recall': 0.983003, 'recall_grapheme': 0.974027, 'recall_vowel': 0.992122, 'recall_consonant': 0.991835, 'acc_grapheme': 0.976245, 'acc_vowel': 0.993515, 'acc_consonant': 0.99257, 'loss_grapheme': 0.184502, 'loss_vowel': 0.119877, 'loss_consonant': 0.088966}
  238 | 0.000015 | 073728/160596 | 3.7852 | 2.2145 |
val: {'recall': 0.983339, 'recall_grapheme': 0.97454, 'recall_vowel': 0.992508, 'recall_consonant': 0.991768, 'acc_grapheme': 0.97699, 'acc_vowel': 0.993862, 'acc_consonant': 0.992869, 'loss_grapheme': 0.142637, 'loss_vowel': 0.086487, 'loss_consonant': 0.066051}
  239 | 0.000013 | 118784/160596 | 4.4240 | 2.77

  269 | 0.000020 | 036864/160596 | 3.3534 | 2.2221 |
val: {'recall': 0.98411, 'recall_grapheme': 0.975018, 'recall_vowel': 0.993078, 'recall_consonant': 0.993327, 'acc_grapheme': 0.977065, 'acc_vowel': 0.993912, 'acc_consonant': 0.992943, 'loss_grapheme': 0.135213, 'loss_vowel': 0.076796, 'loss_consonant': 0.061474}
  270 | 0.000020 | 081920/160596 | 3.8405 | 2.5556 |
val: {'recall': 0.98337, 'recall_grapheme': 0.97443, 'recall_vowel': 0.992546, 'recall_consonant': 0.992074, 'acc_grapheme': 0.976518, 'acc_vowel': 0.99349, 'acc_consonant': 0.992595, 'loss_grapheme': 0.177909, 'loss_vowel': 0.11266, 'loss_consonant': 0.086049}
  271 | 0.000020 | 126976/160596 | 0.0054 | 2.4768 |
val: {'recall': 0.983062, 'recall_grapheme': 0.973783, 'recall_vowel': 0.993131, 'recall_consonant': 0.991551, 'acc_grapheme': 0.976618, 'acc_vowel': 0.993838, 'acc_consonant': 0.992794, 'loss_grapheme': 0.145273, 'loss_vowel': 0.086979, 'loss_consonant': 0.06694}
  273 | 0.000019 | 012288/160596 | 3.2754 | 2.751

  302 | 0.000015 | 090112/160596 | 2.6312 | 2.7341 |
val: {'recall': 0.98289, 'recall_grapheme': 0.973678, 'recall_vowel': 0.992302, 'recall_consonant': 0.991904, 'acc_grapheme': 0.976493, 'acc_vowel': 0.993266, 'acc_consonant': 0.992446, 'loss_grapheme': 0.178524, 'loss_vowel': 0.109209, 'loss_consonant': 0.086438}
  303 | 0.000017 | 135168/160596 | 1.2320 | 2.5044 |
val: {'recall': 0.983363, 'recall_grapheme': 0.97375, 'recall_vowel': 0.992526, 'recall_consonant': 0.993426, 'acc_grapheme': 0.976444, 'acc_vowel': 0.993689, 'acc_consonant': 0.992496, 'loss_grapheme': 0.216388, 'loss_vowel': 0.137286, 'loss_consonant': 0.099744}
  305 | 0.000018 | 020480/160596 | 0.9559 | 2.8179 |
val: {'recall': 0.983209, 'recall_grapheme': 0.973712, 'recall_vowel': 0.992287, 'recall_consonant': 0.993125, 'acc_grapheme': 0.976469, 'acc_vowel': 0.993539, 'acc_consonant': 0.992695, 'loss_grapheme': 0.159226, 'loss_vowel': 0.097191, 'loss_consonant': 0.074482}
  306 | 0.000019 | 065536/160596 | 0.9294 | 2

KeyboardInterrupt: 

In [None]:
train(args)

In [None]:
#save_model(model, model_file)

In [None]:
del model