In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet

In [2]:
import math
import torch
from torch.optim.optimizer import Optimizer, required

class RAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(RAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(RAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]
                if state['step'] == buffered[0]:
                    N_sma, step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        step_size = 1.0 / (1 - beta1 ** state['step'])
                    buffered[2] = step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:            
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size * group['lr'], exp_avg, denom)
                else:
                    p_data_fp32.add_(-step_size * group['lr'], exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

class PlainRAdam(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)

        super(PlainRAdam, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(PlainRAdam, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('RAdam does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                state['step'] += 1
                beta2_t = beta2 ** state['step']
                N_sma_max = 2 / (1 - beta2) - 1
                N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                if N_sma >= 5:                    
                    step_size = group['lr'] * math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    p_data_fp32.addcdiv_(-step_size, exp_avg, denom)
                else:
                    step_size = group['lr'] / (1 - beta1 ** state['step'])
                    p_data_fp32.add_(-step_size, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss


class AdamW(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, warmup = 0):
        defaults = dict(lr=lr, betas=betas, eps=eps,
                        weight_decay=weight_decay, warmup = warmup)
        super(AdamW, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(AdamW, self).__setstate__(state)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1

                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
                exp_avg.mul_(beta1).add_(1 - beta1, grad)

                denom = exp_avg_sq.sqrt().add_(group['eps'])
                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']
                
                if group['warmup'] > state['step']:
                    scheduled_lr = 1e-8 + state['step'] * group['lr'] / group['warmup']
                else:
                    scheduled_lr = group['lr']

                step_size = scheduled_lr * math.sqrt(bias_correction2) / bias_correction1
                
                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * scheduled_lr, p_data_fp32)

                p_data_fp32.addcdiv_(-step_size, exp_avg, denom)

                p.data.copy_(p_data_fp32)

        return loss

In [3]:
'''
!pip install opencv-python
!pip install fastparquet
!pip install pyarrow
!pip install snappy
!conda install python-snappy -y
'''

'\n!pip install opencv-python\n!pip install fastparquet\n!pip install pyarrow\n!pip install snappy\n!conda install python-snappy -y\n'

In [4]:
!ls /mnt/chicm/data/bengali

ls: cannot access '/mnt/chicm/data/bengali': No such file or directory


In [5]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [6]:
DATA_DIR = '/home/chec/data/bengali'

In [7]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [8]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [9]:
import albumentations as albu

def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)

In [10]:
#plt.imshow(x)

In [11]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

HEIGHT = 137
WIDTH = 236

class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            img = augs(image=img)['image']
        
        img = np.expand_dims(img, axis=-1)
        
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
    train_df = shuffle(train_df, random_state=1234)
    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [12]:
#train_loader, val_loader = get_train_val_loaders(dev_mode=True)

In [13]:
'''
for img, y in train_loader:
    print(img.size(), y.size())
    print(y)
    #print(img)
    #plt.imshow(img.squeeze()[0].numpy())
    break
'''

'\nfor img, y in train_loader:\n    print(img.size(), y.size())\n    print(y)\n    #print(img)\n    #plt.imshow(img.squeeze()[0].numpy())\n    break\n'

# model

In [14]:
#import pretrainedmodels

In [15]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [16]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [17]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [18]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

In [19]:
#model.last_linear.in_features

In [20]:
class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
    def logits(self, x):
        x = self.avg_pool(x)
        x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [21]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [22]:
'''
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'best_model.pth'
args.predict = False

bnet = create_model(args)[0].cuda()
'''

"\nargs = Namespace()\nargs.backbone = 'se_resnext50_32x4d'\nargs.ckp_name = 'best_model.pth'\nargs.predict = False\n\nbnet = create_model(args)[0].cuda()\n"

In [23]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [24]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [25]:
round(1/9, 6)

0.111111

In [26]:
import numpy as np
import sklearn.metrics
import torch


def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    #y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y_grapheme, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y_vowel, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y_consonant, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score

def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    
    return metrics

In [27]:
def accuracy(output, label, topk=(1,)):
    maxk = max(topk)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(label.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).sum().item()
        res.append(correct_k)
    return res

In [28]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    
    return loss0 + loss1 + loss2 #, loss0.item(), loss1.item(), loss2.item()

In [29]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2 = 0., 0., 0.
    preds0, preds1,preds2 = [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    
    return metrics
            

In [30]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [31]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [32]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [33]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [34]:
np.random.random()

0.36119972638146136

In [35]:
def train(args):
    global model

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    elif args.optim == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr)
        
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    #optimizer.zero_grad()

    #if args.lrs == 'plateau':
    #    lr_scheduler.step(best_metrics)
    #else:
    #    lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.num_epochs):
        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            #do_mixup = False #(np.random.random() < 0.4)
            
            #if do_mixup:
            #    img, targets = mixup(img, targets)
            batch_size = img.size(0)
          
            
            
            #if do_mixup:
            #    loss = mixup_criterion(outputs, targets)
            #else:
            #    loss = criterion(outputs, targets)
            r = np.random.rand()
            #if args.beta > 0 and r < args.cutmix_prob:
            if r < 0.5:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            else:
                #img, targets = mixup(img, targets)
                outputs = model(img)
                #loss = mixup_criterion(outputs, targets)
                loss = criterion(outputs, targets)
            
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            
            #loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            
            #if batch_idx % 4 == 0:
            #    optimizer.step()
            #    optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                #outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
                #preds0 = (torch.max(outputs[0], dim=1)[1]).cpu().numpy()
                #preds1 = (torch.max(outputs[1], dim=1)[1]).cpu().numpy()
                #preds2 = (torch.max(outputs[2], dim=1)[1]).cpu().numpy()
                #train_metrics = calc_metrics(preds0, preds1, preds2, targets.cpu().numpy())
                #print('train:', train_metrics)
                #save_model(model, model_file+'_latest')
                val_metrics = validate(model, val_loader)
                print('\nval:', val_metrics)
                
                if val_metrics[best_key] > best_metrics:
                    best_metrics = val_metrics[best_key]
                    save_model(model, model_file)
                    print('** saved')
                
                model.train()
                
                if args.lrs == 'plateau':
                    lr_scheduler.step(best_metrics)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
        
    

In [36]:
args = Namespace()
args.backbone = 'efficientnet-b5'
args.ckp_name = 'best_model.pth'
args.predict = False
args.optim = 'RAdam'
args.lr = 2e-5
args.lrs = 'cosine'
args.t_max = 12
args.factor = 0.6
args.patience = 3
args.min_lr = 1e-6
args.iter_val = 200
args.num_epochs = 100000
args.batch_size = 512
args.val_batch_size = 1024

args.beta = 1.0
args.cutmix_prob = 0.5

In [37]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=1)

(200840, 5)
(200840, 32332)
(160635, 5) (40205, 5)


In [38]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


Loaded pretrained weights for efficientnet-b5
model file: ./models/efficientnet-b5/best_model.pth, exist: True
loading ./models/efficientnet-b5/best_model.pth...


In [None]:
train(args) 

{'recall': 0.980026, 'recall_grapheme': 0.97021, 'recall_vowel': 0.990607, 'recall_consonant': 0.989076, 'acc_grapheme': 0.970327, 'acc_vowel': 0.991867, 'acc_consonant': 0.99122, 'loss_grapheme': 0.132579, 'loss_vowel': 0.063698, 'loss_consonant': 0.053269}
    0 | 0.000020 | 102400/160635 | 1.4216 | 1.5926 |
val: {'recall': 0.979428, 'recall_grapheme': 0.969732, 'recall_vowel': 0.99021, 'recall_consonant': 0.988036, 'acc_grapheme': 0.970327, 'acc_vowel': 0.991668, 'acc_consonant': 0.991419, 'loss_grapheme': 0.123832, 'loss_vowel': 0.0538, 'loss_consonant': 0.046213}




    1 | 0.000020 | 044544/160635 | 0.0343 | 1.7214 |
val: {'recall': 0.978853, 'recall_grapheme': 0.969097, 'recall_vowel': 0.989925, 'recall_consonant': 0.987291, 'acc_grapheme': 0.970526, 'acc_vowel': 0.991668, 'acc_consonant': 0.991369, 'loss_grapheme': 0.125642, 'loss_vowel': 0.054729, 'loss_consonant': 0.046928}
    1 | 0.000019 | 146944/160635 | 0.0254 | 1.7357 |
val: {'recall': 0.979415, 'recall_grapheme': 0.969339, 'recall_vowel': 0.989935, 'recall_consonant': 0.98905, 'acc_grapheme': 0.970551, 'acc_vowel': 0.991668, 'acc_consonant': 0.99127, 'loss_grapheme': 0.129955, 'loss_vowel': 0.062203, 'loss_consonant': 0.051282}
    2 | 0.000017 | 089088/160635 | 4.6678 | 1.8437 |
val: {'recall': 0.980222, 'recall_grapheme': 0.970599, 'recall_vowel': 0.99002, 'recall_consonant': 0.989671, 'acc_grapheme': 0.970775, 'acc_vowel': 0.991668, 'acc_consonant': 0.990922, 'loss_grapheme': 0.145251, 'loss_vowel': 0.073869, 'loss_consonant': 0.061182}
** saved
    3 | 0.000015 | 031232/160635 | 0.

   17 | 0.000017 | 142848/160635 | 0.0232 | 1.6166 |
val: {'recall': 0.978898, 'recall_grapheme': 0.969306, 'recall_vowel': 0.988991, 'recall_consonant': 0.987989, 'acc_grapheme': 0.970501, 'acc_vowel': 0.991643, 'acc_consonant': 0.990996, 'loss_grapheme': 0.123555, 'loss_vowel': 0.050833, 'loss_consonant': 0.044785}
   18 | 0.000015 | 084992/160635 | 1.9702 | 1.7859 |
val: {'recall': 0.979288, 'recall_grapheme': 0.969932, 'recall_vowel': 0.990072, 'recall_consonant': 0.987215, 'acc_grapheme': 0.971073, 'acc_vowel': 0.991643, 'acc_consonant': 0.991245, 'loss_grapheme': 0.129075, 'loss_vowel': 0.061248, 'loss_consonant': 0.051546}
   19 | 0.000013 | 027136/160635 | 4.8899 | 1.9568 |
val: {'recall': 0.979606, 'recall_grapheme': 0.970542, 'recall_vowel': 0.989461, 'recall_consonant': 0.987878, 'acc_grapheme': 0.971372, 'acc_vowel': 0.991593, 'acc_consonant': 0.991145, 'loss_grapheme': 0.127272, 'loss_vowel': 0.05641, 'loss_consonant': 0.050583}
   19 | 0.000011 | 129536/160635 | 3.9684 | 

** saved
   34 | 0.000013 | 080896/160635 | 4.4634 | 1.6296 |
val: {'recall': 0.979439, 'recall_grapheme': 0.969324, 'recall_vowel': 0.990212, 'recall_consonant': 0.988896, 'acc_grapheme': 0.9707, 'acc_vowel': 0.991867, 'acc_consonant': 0.991344, 'loss_grapheme': 0.127543, 'loss_vowel': 0.056723, 'loss_consonant': 0.049874}
   35 | 0.000011 | 023040/160635 | 0.0159 | 0.8954 |
val: {'recall': 0.980082, 'recall_grapheme': 0.970736, 'recall_vowel': 0.990228, 'recall_consonant': 0.988629, 'acc_grapheme': 0.971297, 'acc_vowel': 0.992066, 'acc_consonant': 0.991518, 'loss_grapheme': 0.118208, 'loss_vowel': 0.048186, 'loss_consonant': 0.042618}
   35 | 0.000008 | 125440/160635 | 4.3193 | 1.5558 |
val: {'recall': 0.98018, 'recall_grapheme': 0.97096, 'recall_vowel': 0.990596, 'recall_consonant': 0.988203, 'acc_grapheme': 0.971645, 'acc_vowel': 0.992066, 'acc_consonant': 0.991444, 'loss_grapheme': 0.120369, 'loss_vowel': 0.052064, 'loss_consonant': 0.045449}
   36 | 0.000006 | 067584/160635 | 0.0

   51 | 0.000008 | 018944/160635 | 4.5109 | 1.5164 |
val: {'recall': 0.980208, 'recall_grapheme': 0.970824, 'recall_vowel': 0.990362, 'recall_consonant': 0.98882, 'acc_grapheme': 0.971869, 'acc_vowel': 0.991991, 'acc_consonant': 0.991494, 'loss_grapheme': 0.122238, 'loss_vowel': 0.056858, 'loss_consonant': 0.04733}
   51 | 0.000006 | 121344/160635 | 4.2205 | 1.7131 |
val: {'recall': 0.980451, 'recall_grapheme': 0.971231, 'recall_vowel': 0.990262, 'recall_consonant': 0.989078, 'acc_grapheme': 0.972018, 'acc_vowel': 0.99214, 'acc_consonant': 0.991643, 'loss_grapheme': 0.120716, 'loss_vowel': 0.054275, 'loss_consonant': 0.047123}
   52 | 0.000004 | 063488/160635 | 0.0196 | 1.3440 |
val: {'recall': 0.979845, 'recall_grapheme': 0.970467, 'recall_vowel': 0.990706, 'recall_consonant': 0.987741, 'acc_grapheme': 0.972193, 'acc_vowel': 0.992265, 'acc_consonant': 0.991717, 'loss_grapheme': 0.113592, 'loss_vowel': 0.046023, 'loss_consonant': 0.040173}
   53 | 0.000002 | 005632/160635 | 4.9897 | 1.

   67 | 0.000004 | 117248/160635 | 2.5359 | 1.7289 |
val: {'recall': 0.980698, 'recall_grapheme': 0.971658, 'recall_vowel': 0.990834, 'recall_consonant': 0.988643, 'acc_grapheme': 0.97274, 'acc_vowel': 0.992389, 'acc_consonant': 0.991593, 'loss_grapheme': 0.118685, 'loss_vowel': 0.052678, 'loss_consonant': 0.04529}
   68 | 0.000002 | 059392/160635 | 3.0239 | 1.9479 |
val: {'recall': 0.980653, 'recall_grapheme': 0.971558, 'recall_vowel': 0.990775, 'recall_consonant': 0.988722, 'acc_grapheme': 0.972765, 'acc_vowel': 0.99229, 'acc_consonant': 0.991543, 'loss_grapheme': 0.122499, 'loss_vowel': 0.057941, 'loss_consonant': 0.049276}
   69 | 0.000001 | 001536/160635 | 0.0231 | 1.2142 |
val: {'recall': 0.980585, 'recall_grapheme': 0.971565, 'recall_vowel': 0.990576, 'recall_consonant': 0.988634, 'acc_grapheme': 0.972939, 'acc_vowel': 0.992314, 'acc_consonant': 0.991543, 'loss_grapheme': 0.116843, 'loss_vowel': 0.051443, 'loss_consonant': 0.044679}
   69 | 0.000001 | 103936/160635 | 1.1283 | 1.

   84 | 0.000001 | 055296/160635 | 3.8815 | 1.8673 |
val: {'recall': 0.980875, 'recall_grapheme': 0.971626, 'recall_vowel': 0.990759, 'recall_consonant': 0.98949, 'acc_grapheme': 0.972789, 'acc_vowel': 0.992464, 'acc_consonant': 0.991916, 'loss_grapheme': 0.119631, 'loss_vowel': 0.055689, 'loss_consonant': 0.047219}
   84 | 0.000001 | 157696/160635 | 0.0120 | 1.5200 |
val: {'recall': 0.980784, 'recall_grapheme': 0.971435, 'recall_vowel': 0.990852, 'recall_consonant': 0.989412, 'acc_grapheme': 0.972814, 'acc_vowel': 0.992439, 'acc_consonant': 0.991892, 'loss_grapheme': 0.111761, 'loss_vowel': 0.04615, 'loss_consonant': 0.039822}
   85 | 0.000001 | 099840/160635 | 3.3682 | 1.3286 |
val: {'recall': 0.98053, 'recall_grapheme': 0.971524, 'recall_vowel': 0.990739, 'recall_consonant': 0.988332, 'acc_grapheme': 0.972864, 'acc_vowel': 0.992414, 'acc_consonant': 0.991817, 'loss_grapheme': 0.110627, 'loss_vowel': 0.044428, 'loss_consonant': 0.038691}
   86 | 0.000002 | 041984/160635 | 4.1985 | 1.

  100 | 0.000001 | 153600/160635 | 0.0179 | 1.4934 |
val: {'recall': 0.980618, 'recall_grapheme': 0.972174, 'recall_vowel': 0.990123, 'recall_consonant': 0.988, 'acc_grapheme': 0.973362, 'acc_vowel': 0.99214, 'acc_consonant': 0.991618, 'loss_grapheme': 0.108189, 'loss_vowel': 0.042693, 'loss_consonant': 0.037559}
  101 | 0.000002 | 095744/160635 | 3.1684 | 1.3000 |
val: {'recall': 0.980557, 'recall_grapheme': 0.972062, 'recall_vowel': 0.990217, 'recall_consonant': 0.987885, 'acc_grapheme': 0.973362, 'acc_vowel': 0.992314, 'acc_consonant': 0.991767, 'loss_grapheme': 0.109544, 'loss_vowel': 0.043751, 'loss_consonant': 0.038437}
  102 | 0.000004 | 037888/160635 | 0.0232 | 1.5840 |
val: {'recall': 0.980632, 'recall_grapheme': 0.971876, 'recall_vowel': 0.990335, 'recall_consonant': 0.988439, 'acc_grapheme': 0.973337, 'acc_vowel': 0.99229, 'acc_consonant': 0.991643, 'loss_grapheme': 0.111907, 'loss_vowel': 0.047088, 'loss_consonant': 0.041137}
  102 | 0.000006 | 140288/160635 | 0.0125 | 1.75

  117 | 0.000004 | 091648/160635 | 0.0078 | 1.3309 |
val: {'recall': 0.980901, 'recall_grapheme': 0.972229, 'recall_vowel': 0.990812, 'recall_consonant': 0.988336, 'acc_grapheme': 0.97366, 'acc_vowel': 0.992513, 'acc_consonant': 0.991767, 'loss_grapheme': 0.106444, 'loss_vowel': 0.041722, 'loss_consonant': 0.036412}
  118 | 0.000006 | 033792/160635 | 0.0128 | 1.6388 |
val: {'recall': 0.981212, 'recall_grapheme': 0.972761, 'recall_vowel': 0.990839, 'recall_consonant': 0.988489, 'acc_grapheme': 0.973536, 'acc_vowel': 0.992488, 'acc_consonant': 0.991817, 'loss_grapheme': 0.112221, 'loss_vowel': 0.047989, 'loss_consonant': 0.041165}
  118 | 0.000008 | 136192/160635 | 0.0110 | 1.5780 |
val: {'recall': 0.980909, 'recall_grapheme': 0.972353, 'recall_vowel': 0.99073, 'recall_consonant': 0.988202, 'acc_grapheme': 0.973685, 'acc_vowel': 0.992488, 'acc_consonant': 0.991916, 'loss_grapheme': 0.110169, 'loss_vowel': 0.045832, 'loss_consonant': 0.039647}
  119 | 0.000010 | 078336/160635 | 0.6342 | 1

  134 | 0.000008 | 029696/160635 | 2.2756 | 1.7758 |
val: {'recall': 0.981082, 'recall_grapheme': 0.972354, 'recall_vowel': 0.991327, 'recall_consonant': 0.988292, 'acc_grapheme': 0.974182, 'acc_vowel': 0.992613, 'acc_consonant': 0.991991, 'loss_grapheme': 0.112565, 'loss_vowel': 0.049069, 'loss_consonant': 0.042238}
  134 | 0.000011 | 132096/160635 | 3.4873 | 1.5254 |
val: {'recall': 0.98168, 'recall_grapheme': 0.97266, 'recall_vowel': 0.991128, 'recall_consonant': 0.990275, 'acc_grapheme': 0.973735, 'acc_vowel': 0.992538, 'acc_consonant': 0.99219, 'loss_grapheme': 0.112222, 'loss_vowel': 0.048133, 'loss_consonant': 0.041725}
  135 | 0.000013 | 074240/160635 | 2.7795 | 1.5630 |
val: {'recall': 0.981251, 'recall_grapheme': 0.972865, 'recall_vowel': 0.990845, 'recall_consonant': 0.988428, 'acc_grapheme': 0.973934, 'acc_vowel': 0.992339, 'acc_consonant': 0.99214, 'loss_grapheme': 0.110001, 'loss_vowel': 0.045562, 'loss_consonant': 0.039639}
  136 | 0.000015 | 016384/160635 | 0.0058 | 1.3

  150 | 0.000013 | 128000/160635 | 0.0150 | 1.5541 |
val: {'recall': 0.981937, 'recall_grapheme': 0.973063, 'recall_vowel': 0.991392, 'recall_consonant': 0.990229, 'acc_grapheme': 0.974332, 'acc_vowel': 0.992588, 'acc_consonant': 0.992165, 'loss_grapheme': 0.110053, 'loss_vowel': 0.045717, 'loss_consonant': 0.039054}
  151 | 0.000015 | 070144/160635 | 0.0066 | 1.6842 |
val: {'recall': 0.981275, 'recall_grapheme': 0.972765, 'recall_vowel': 0.990983, 'recall_consonant': 0.988586, 'acc_grapheme': 0.973884, 'acc_vowel': 0.992464, 'acc_consonant': 0.992265, 'loss_grapheme': 0.10977, 'loss_vowel': 0.046896, 'loss_consonant': 0.040131}
  152 | 0.000017 | 012288/160635 | 3.6739 | 1.2945 |
val: {'recall': 0.981226, 'recall_grapheme': 0.973124, 'recall_vowel': 0.990411, 'recall_consonant': 0.988243, 'acc_grapheme': 0.973834, 'acc_vowel': 0.992314, 'acc_consonant': 0.991916, 'loss_grapheme': 0.111941, 'loss_vowel': 0.045437, 'loss_consonant': 0.039176}
  152 | 0.000019 | 114688/160635 | 0.0106 | 

  167 | 0.000017 | 066048/160635 | 0.0137 | 1.7025 |
val: {'recall': 0.981609, 'recall_grapheme': 0.972657, 'recall_vowel': 0.991256, 'recall_consonant': 0.989867, 'acc_grapheme': 0.974406, 'acc_vowel': 0.992389, 'acc_consonant': 0.991892, 'loss_grapheme': 0.109996, 'loss_vowel': 0.047313, 'loss_consonant': 0.041982}
  168 | 0.000019 | 008192/160635 | 3.8557 | 2.3393 |
val: {'recall': 0.981583, 'recall_grapheme': 0.972423, 'recall_vowel': 0.990601, 'recall_consonant': 0.990884, 'acc_grapheme': 0.973934, 'acc_vowel': 0.992265, 'acc_consonant': 0.992041, 'loss_grapheme': 0.121634, 'loss_vowel': 0.055141, 'loss_consonant': 0.046839}
  168 | 0.000020 | 110592/160635 | 0.0077 | 1.7213 |
val: {'recall': 0.98183, 'recall_grapheme': 0.972978, 'recall_vowel': 0.990772, 'recall_consonant': 0.990591, 'acc_grapheme': 0.974008, 'acc_vowel': 0.992439, 'acc_consonant': 0.992016, 'loss_grapheme': 0.115241, 'loss_vowel': 0.051163, 'loss_consonant': 0.044902}
  169 | 0.000020 | 052736/160635 | 3.3491 | 

  184 | 0.000020 | 004096/160635 | 0.0081 | 1.3402 |
val: {'recall': 0.981692, 'recall_grapheme': 0.972997, 'recall_vowel': 0.990519, 'recall_consonant': 0.990257, 'acc_grapheme': 0.973486, 'acc_vowel': 0.992488, 'acc_consonant': 0.992016, 'loss_grapheme': 0.111887, 'loss_vowel': 0.045742, 'loss_consonant': 0.040582}
  184 | 0.000020 | 106496/160635 | 0.0093 | 1.5738 |
val: {'recall': 0.981323, 'recall_grapheme': 0.97232, 'recall_vowel': 0.991046, 'recall_consonant': 0.989604, 'acc_grapheme': 0.973436, 'acc_vowel': 0.992165, 'acc_consonant': 0.991892, 'loss_grapheme': 0.112551, 'loss_vowel': 0.048577, 'loss_consonant': 0.043324}
  185 | 0.000020 | 048640/160635 | 0.0152 | 1.7806 |
val: {'recall': 0.98159, 'recall_grapheme': 0.972686, 'recall_vowel': 0.990576, 'recall_consonant': 0.990413, 'acc_grapheme': 0.974257, 'acc_vowel': 0.992339, 'acc_consonant': 0.992091, 'loss_grapheme': 0.11141, 'loss_vowel': 0.0487, 'loss_consonant': 0.041596}
  185 | 0.000019 | 151040/160635 | 4.7143 | 1.61

  200 | 0.000020 | 102400/160635 | 0.0191 | 1.3510 |
val: {'recall': 0.982598, 'recall_grapheme': 0.975037, 'recall_vowel': 0.991331, 'recall_consonant': 0.988989, 'acc_grapheme': 0.975376, 'acc_vowel': 0.992687, 'acc_consonant': 0.992265, 'loss_grapheme': 0.10434, 'loss_vowel': 0.039555, 'loss_consonant': 0.035841}
  201 | 0.000019 | 044544/160635 | 0.0260 | 1.7899 |
val: {'recall': 0.981514, 'recall_grapheme': 0.973313, 'recall_vowel': 0.990558, 'recall_consonant': 0.988874, 'acc_grapheme': 0.974356, 'acc_vowel': 0.992439, 'acc_consonant': 0.992215, 'loss_grapheme': 0.10626, 'loss_vowel': 0.044002, 'loss_consonant': 0.039991}
  201 | 0.000017 | 146944/160635 | 2.6176 | 1.7036 |
val: {'recall': 0.981924, 'recall_grapheme': 0.974048, 'recall_vowel': 0.991024, 'recall_consonant': 0.988577, 'acc_grapheme': 0.974978, 'acc_vowel': 0.992588, 'acc_consonant': 0.992488, 'loss_grapheme': 0.108338, 'loss_vowel': 0.047723, 'loss_consonant': 0.041182}
  202 | 0.000015 | 089088/160635 | 0.0084 | 1

  217 | 0.000017 | 040448/160635 | 0.9370 | 1.4708 |
val: {'recall': 0.981933, 'recall_grapheme': 0.973888, 'recall_vowel': 0.991377, 'recall_consonant': 0.98858, 'acc_grapheme': 0.974879, 'acc_vowel': 0.992638, 'acc_consonant': 0.992389, 'loss_grapheme': 0.105564, 'loss_vowel': 0.044916, 'loss_consonant': 0.039517}
  217 | 0.000015 | 142848/160635 | 0.0036 | 1.5133 |
val: {'recall': 0.982295, 'recall_grapheme': 0.975037, 'recall_vowel': 0.991254, 'recall_consonant': 0.987852, 'acc_grapheme': 0.975426, 'acc_vowel': 0.992563, 'acc_consonant': 0.992314, 'loss_grapheme': 0.103815, 'loss_vowel': 0.043973, 'loss_consonant': 0.037433}
  218 | 0.000013 | 084992/160635 | 1.9744 | 1.6019 |
val: {'recall': 0.98268, 'recall_grapheme': 0.975112, 'recall_vowel': 0.991298, 'recall_consonant': 0.989198, 'acc_grapheme': 0.975749, 'acc_vowel': 0.992737, 'acc_consonant': 0.992314, 'loss_grapheme': 0.104869, 'loss_vowel': 0.046332, 'loss_consonant': 0.039256}
  219 | 0.000011 | 027136/160635 | 2.0712 | 1

  233 | 0.000013 | 138752/160635 | 0.0054 | 1.4936 |
val: {'recall': 0.982683, 'recall_grapheme': 0.975445, 'recall_vowel': 0.991516, 'recall_consonant': 0.988324, 'acc_grapheme': 0.976421, 'acc_vowel': 0.992862, 'acc_consonant': 0.992563, 'loss_grapheme': 0.101429, 'loss_vowel': 0.043679, 'loss_consonant': 0.037587}
  234 | 0.000011 | 080896/160635 | 0.0063 | 1.4816 |
val: {'recall': 0.982929, 'recall_grapheme': 0.976193, 'recall_vowel': 0.991511, 'recall_consonant': 0.987821, 'acc_grapheme': 0.976645, 'acc_vowel': 0.993011, 'acc_consonant': 0.992265, 'loss_grapheme': 0.10027, 'loss_vowel': 0.041647, 'loss_consonant': 0.036315}
** saved
  235 | 0.000008 | 023040/160635 | 0.0149 | 1.4852 |
val: {'recall': 0.98258, 'recall_grapheme': 0.974906, 'recall_vowel': 0.991838, 'recall_consonant': 0.988671, 'acc_grapheme': 0.975824, 'acc_vowel': 0.99311, 'acc_consonant': 0.992389, 'loss_grapheme': 0.102224, 'loss_vowel': 0.044242, 'loss_consonant': 0.039015}
  235 | 0.000006 | 125440/160635 | 1.

  250 | 0.000008 | 076800/160635 | 0.0096 | 1.5870 |
val: {'recall': 0.982752, 'recall_grapheme': 0.975854, 'recall_vowel': 0.991829, 'recall_consonant': 0.98747, 'acc_grapheme': 0.976471, 'acc_vowel': 0.992936, 'acc_consonant': 0.99219, 'loss_grapheme': 0.103531, 'loss_vowel': 0.04737, 'loss_consonant': 0.040136}
  251 | 0.000006 | 018944/160635 | 0.0075 | 1.5341 |
val: {'recall': 0.982378, 'recall_grapheme': 0.975172, 'recall_vowel': 0.991565, 'recall_consonant': 0.987602, 'acc_grapheme': 0.976222, 'acc_vowel': 0.992663, 'acc_consonant': 0.99224, 'loss_grapheme': 0.100366, 'loss_vowel': 0.042755, 'loss_consonant': 0.036624}
  251 | 0.000004 | 121344/160635 | 0.0020 | 1.4994 |
val: {'recall': 0.982429, 'recall_grapheme': 0.975253, 'recall_vowel': 0.991836, 'recall_consonant': 0.987376, 'acc_grapheme': 0.976073, 'acc_vowel': 0.993011, 'acc_consonant': 0.99224, 'loss_grapheme': 0.100909, 'loss_vowel': 0.044397, 'loss_consonant': 0.037886}
  252 | 0.000002 | 063488/160635 | 2.3039 | 1.78

  267 | 0.000002 | 117248/160635 | 0.0036 | 1.5686 |
val: {'recall': 0.983411, 'recall_grapheme': 0.976256, 'recall_vowel': 0.991908, 'recall_consonant': 0.989225, 'acc_grapheme': 0.977267, 'acc_vowel': 0.99316, 'acc_consonant': 0.992737, 'loss_grapheme': 0.099384, 'loss_vowel': 0.043489, 'loss_consonant': 0.037253}
  268 | 0.000001 | 059392/160635 | 3.4335 | 1.2917 |
val: {'recall': 0.983488, 'recall_grapheme': 0.976608, 'recall_vowel': 0.991655, 'recall_consonant': 0.989082, 'acc_grapheme': 0.977068, 'acc_vowel': 0.993011, 'acc_consonant': 0.992687, 'loss_grapheme': 0.098433, 'loss_vowel': 0.04109, 'loss_consonant': 0.034987}
  269 | 0.000001 | 001536/160635 | 4.6920 | 2.9598 |
val: {'recall': 0.983355, 'recall_grapheme': 0.976281, 'recall_vowel': 0.991823, 'recall_consonant': 0.989036, 'acc_grapheme': 0.976893, 'acc_vowel': 0.993036, 'acc_consonant': 0.992663, 'loss_grapheme': 0.100298, 'loss_vowel': 0.044452, 'loss_consonant': 0.037617}
  269 | 0.000001 | 096256/160635 | 0.0035 | 1

In [None]:
#save_model(model, model_file)

In [None]:
del model