In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet

In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
import pdb

class Grid(object):
    def __init__(self, d1, d2, rotate = 1, ratio = 0.5, mode=0, prob=1.):
        self.d1 = d1
        self.d2 = d2
        self.rotate = rotate
        self.ratio = ratio
        self.mode=mode
        self.st_prob = self.prob = prob

    def set_prob(self, epoch, max_epoch):
        self.prob = self.st_prob * min(1, epoch / max_epoch)

    def __call__(self, img):
        if np.random.rand() > self.prob:
            return img
        h = img.size(1)
        w = img.size(2)
        hh = int(1.5*h)
        ww = int(1.5*w)
        d = np.random.randint(self.d1, self.d2)
        #d = self.d
        self.l = int(d*self.ratio+0.5)
        mask = np.ones((hh, ww), np.float32)
        st_h = np.random.randint(d)
        st_w = np.random.randint(d)
        for i in range(-1, hh//d+1):
                s = d*i + st_h
                t = s+self.l
                s = max(min(s, hh), 0)
                t = max(min(t, hh), 0)
                mask[s:t,:] *= 0
        for i in range(-1, ww//d+1):
                s = d*i + st_w
                t = s+self.l
                s = max(min(s, ww), 0)
                t = max(min(t, ww), 0)
                mask[:,s:t] *= 0
        r = np.random.randint(self.rotate)
        mask = Image.fromarray(np.uint8(mask))
        mask = mask.rotate(r)
        mask = np.asarray(mask)
        mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]

        mask = torch.from_numpy(mask).float().cuda()
        if self.mode == 1:
            mask = 1-mask

        mask = mask.expand_as(img)
        img = img * mask 

        return img

class GridMask(nn.Module):
    def __init__(self, d1, d2, rotate = 1, ratio = 0.6, mode=1, prob=1.):
        super(GridMask, self).__init__()
        self.rotate = rotate
        self.ratio = ratio
        self.mode = mode
        self.st_prob = prob
        self.grid = Grid(d1, d2, rotate, ratio, mode, prob)

    def set_prob(self, epoch, max_epoch):
        self.grid.set_prob(epoch, max_epoch)

    def forward(self, x):
        if not self.training:
            return x
        n,c,h,w = x.size()
        y = []
        for i in range(n):
            y.append(self.grid(x[i]))
        y = torch.cat(y).view(n,c,h,w)
        return y


In [8]:
HEIGHT = 137
WIDTH = 236

In [9]:
import albumentations as albu

def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)

In [10]:
#plt.imshow(x)

In [11]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        #print(img.shape)
        #if self.train_mode:
        #    augs = get_train_augs()
        #    img = augs(image=img)['image']
        
        img = np.expand_dims(img, axis=-1)
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
    train_df = shuffle(train_df, random_state=1234)
    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [12]:
#train_loader, val_loader = get_train_val_loaders(dev_mode=True)

# model

In [13]:
#import pretrainedmodels

In [14]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [15]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [16]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [17]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

In [18]:
#model.last_linear.in_features

In [19]:
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        #self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        #self.fix_input_layer()
        
    def fix_input_layer(self):
        if self.backbone_name in ['se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50', 'senet154', 'se_resnet152', 'nasnetmobile', 'mobilenet', 'nasnetalarge']:
            #self.backbone = eval(backbone_name)()
            #print(self.backbone.layer0.conv1)
            w = self.backbone.layer0.conv1.weight.data
            self.backbone.layer0.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
            #self.backbone.layer0.conv1.weight = torch.nn.Parameter(torch.cat((w, w[:, 2, :, :].unsqueeze(1)), dim=1))
            self.backbone.layer0.conv1.weight = torch.nn.Parameter(w[:, 0, :, :].unsqueeze(1))
        
    def logits(self, x):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [20]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [21]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [22]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [23]:
round(1/9, 6)

0.111111

In [24]:
import numpy as np
import sklearn.metrics
import torch


def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    #y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y_grapheme, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y_vowel, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y_consonant, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score

def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    
    return metrics

In [51]:
OHEM = False
def ohem_loss(cls_pred, cls_target, rate=0.5):
    batch_size = cls_pred.size(0) 
    ohem_cls_loss = F.cross_entropy(cls_pred, cls_target, reduction='none', ignore_index=-1)

    sorted_ohem_loss, idx = torch.sort(ohem_cls_loss, descending=True)
    keep_num = min(sorted_ohem_loss.size()[0], int(batch_size*rate) )
    if keep_num < sorted_ohem_loss.size()[0]:
        keep_idx_cuda = idx[:keep_num]
        ohem_cls_loss = ohem_cls_loss[keep_idx_cuda]
    cls_loss = ohem_cls_loss.sum() / keep_num
    return cls_loss

In [52]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7], dim=1)
    if OHEM:
        loss0 = ohem_loss(outputs[0], y_true[:, 0])
        loss1 = ohem_loss(outputs[1], y_true[:, 1])
        loss2 = ohem_loss(outputs[2], y_true[:, 2])
    else:
        loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
        loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
        loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    
    return loss0 + loss1 + loss2 #, loss0.item(), loss1.item(), loss2.item()

In [53]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2 = 0., 0., 0.
    preds0, preds1,preds2 = [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    
    return metrics
            

In [28]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [29]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [30]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [31]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [32]:
np.random.random()

0.3102954855210832

In [33]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from over9000.lookahead import Lookahead
def LookaheadSGD(params, alpha=0.5, k=6, *args, **kwargs):
     sgd = optim.SGD(params, *args, **kwargs)
     return Lookahead(sgd, alpha, k)

In [54]:
def train(args):
    global model

    if args.optim == 'Adam':
        optimizer = optim.Adam([{'params': model.parameters(), 'initial_lr': args.lr }], lr=args.lr, weight_decay=1e-5)
        #optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)
    elif args.optim == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr)
    elif args.optim == 'Over9000':
        optimizer = Over9000(model.parameters(), lr=args.lr)
    else:
        optimizer = LookaheadSGD(
            [{'params': model.parameters(), 'initial_lr': args.lr }],
            lr=args.lr, momentum=0.9, weight_decay=1e-5)

    if args.lrs == 'plateau':
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        #lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr)
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.t_max, eta_min=args.min_lr, last_epoch=args.t_max)
        
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    #optimizer.zero_grad()

    #if args.lrs == 'plateau':
    #    lr_scheduler.step(best_metrics)
    #else:
    #    lr_scheduler.step()
    train_iter = 0
    
    grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

    for epoch in range(args.num_epochs):

        grid.set_prob(epoch, args.st_epochs)

        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            #do_mixup = False #(np.random.random() < 0.4)
            
            #if do_mixup:
            #    img, targets = mixup(img, targets)
            batch_size = img.size(0)
          
            
            
            #if do_mixup:
            #    loss = mixup_criterion(outputs, targets)
            #else:
            #    loss = criterion(outputs, targets)
            r = np.random.rand()
            #if args.beta > 0 and r < args.cutmix_prob:
            if r < 0.3:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            elif r > 0.6: # grid mask
                img = grid(img)
                outputs = model(img)
                loss = criterion(outputs, targets)
            else:
                img, targets = mixup(img, targets)
                outputs = model(img)
                loss = mixup_criterion(outputs, targets)
                #loss = criterion(outputs, targets)
            
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            
            #loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            
            #if batch_idx % 4 == 0:
            #    optimizer.step()
            #    optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                #outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
                #preds0 = (torch.max(outputs[0], dim=1)[1]).cpu().numpy()
                #preds1 = (torch.max(outputs[1], dim=1)[1]).cpu().numpy()
                #preds2 = (torch.max(outputs[2], dim=1)[1]).cpu().numpy()
                #train_metrics = calc_metrics(preds0, preds1, preds2, targets.cpu().numpy())
                #print('train:', train_metrics)
                #save_model(model, model_file+'_latest')
                val_metrics = validate(model, val_loader)
                print('\nval:', val_metrics)
                
                if val_metrics[best_key] > best_metrics:
                    best_metrics = val_metrics[best_key]
                    save_model(model, model_file)
                    print('** saved')
                
                model.train()
                
                if args.lrs == 'plateau':
                    lr_scheduler.step(best_metrics)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
        
    

In [55]:
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'model3_se_resnext50_fold1_mixup_cutmix_224_gridmask.pth'
args.predict = False
args.optim = 'Adam'
args.lr = 1e-4
args.lrs = 'cosine'
args.t_max = 12
args.factor = 0.5
args.patience = 5
args.min_lr = 1e-6
args.iter_val = 200
args.num_epochs = 100000
args.batch_size = 768
args.val_batch_size = 1024
args.st_epochs = 1

args.beta = 1.0
args.cutmix_prob = 0.5

In [47]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=1)

(200840, 5)
(200840, 32332)
(160635, 5) (40205, 5)


In [56]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


model file: ./models/se_resnext50_32x4d/model3_se_resnext50_fold1_mixup_cutmix_224_gridmask.pth, exist: True
loading ./models/se_resnext50_32x4d/model3_se_resnext50_fold1_mixup_cutmix_224_gridmask.pth...


In [None]:
train(args)

{'recall': 0.996536, 'recall_grapheme': 0.995104, 'recall_vowel': 0.998147, 'recall_consonant': 0.997787, 'acc_grapheme': 0.994702, 'acc_vowel': 0.998085, 'acc_consonant': 0.998508, 'loss_grapheme': 0.026128, 'loss_vowel': 0.015355, 'loss_consonant': 0.010569}
    0 | 0.000001 | 153600/160635 | 1.9604 | 0.9943 |
val: {'recall': 0.995378, 'recall_grapheme': 0.993289, 'recall_vowel': 0.997177, 'recall_consonant': 0.997756, 'acc_grapheme': 0.993334, 'acc_vowel': 0.997388, 'acc_consonant': 0.998085, 'loss_grapheme': 0.050876, 'loss_vowel': 0.039911, 'loss_consonant': 0.027875}




    1 | 0.000003 | 146688/160635 | 2.8515 | 0.9768 |
val: {'recall': 0.996071, 'recall_grapheme': 0.994393, 'recall_vowel': 0.997736, 'recall_consonant': 0.997763, 'acc_grapheme': 0.994155, 'acc_vowel': 0.997811, 'acc_consonant': 0.998159, 'loss_grapheme': 0.038061, 'loss_vowel': 0.029167, 'loss_consonant': 0.019082}
    2 | 0.000008 | 139776/160635 | 0.0052 | 1.0124 |
val: {'recall': 0.995672, 'recall_grapheme': 0.9939, 'recall_vowel': 0.997243, 'recall_consonant': 0.997645, 'acc_grapheme': 0.993633, 'acc_vowel': 0.997463, 'acc_consonant': 0.998135, 'loss_grapheme': 0.038652, 'loss_vowel': 0.028081, 'loss_consonant': 0.018674}
    3 | 0.000015 | 132096/160635 | 2.0825 | 1.1681 |

In [50]:
del model

In [38]:
train(args)

{'recall': 0.994437, 'recall_grapheme': 0.99255, 'recall_vowel': 0.996113, 'recall_consonant': 0.996534, 'acc_grapheme': 0.991643, 'acc_vowel': 0.996543, 'acc_consonant': 0.997463, 'loss_grapheme': 0.042325, 'loss_vowel': 0.026621, 'loss_consonant': 0.019315}
    0 | 0.000100 | 153600/160635 | 0.0068 | 1.2476 |
val: {'recall': 0.991259, 'recall_grapheme': 0.987884, 'recall_vowel': 0.994509, 'recall_consonant': 0.994759, 'acc_grapheme': 0.987041, 'acc_vowel': 0.9952, 'acc_consonant': 0.99617, 'loss_grapheme': 0.083335, 'loss_vowel': 0.048971, 'loss_consonant': 0.035815}




    1 | 0.000098 | 146688/160635 | 1.3256 | 1.2112 |
val: {'recall': 0.989781, 'recall_grapheme': 0.986392, 'recall_vowel': 0.99313, 'recall_consonant': 0.99321, 'acc_grapheme': 0.985698, 'acc_vowel': 0.994254, 'acc_consonant': 0.9951, 'loss_grapheme': 0.152745, 'loss_vowel': 0.106245, 'loss_consonant': 0.070814}
    2 | 0.000093 | 139776/160635 | 0.0615 | 1.3192 |
val: {'recall': 0.991795, 'recall_grapheme': 0.988969, 'recall_vowel': 0.993523, 'recall_consonant': 0.995718, 'acc_grapheme': 0.988111, 'acc_vowel': 0.99515, 'acc_consonant': 0.995971, 'loss_grapheme': 0.088764, 'loss_vowel': 0.067296, 'loss_consonant': 0.049089}
    3 | 0.000086 | 132864/160635 | 2.6673 | 1.2214 |
val: {'recall': 0.992205, 'recall_grapheme': 0.989505, 'recall_vowel': 0.994963, 'recall_consonant': 0.994849, 'acc_grapheme': 0.988907, 'acc_vowel': 0.995274, 'acc_consonant': 0.996443, 'loss_grapheme': 0.070851, 'loss_vowel': 0.051897, 'loss_consonant': 0.035}
    4 | 0.000075 | 125952/160635 | 1.3082 | 1.2637 

   26 | 0.000086 | 127488/160635 | 3.3113 | 1.2203 |
val: {'recall': 0.99167, 'recall_grapheme': 0.988377, 'recall_vowel': 0.994638, 'recall_consonant': 0.995286, 'acc_grapheme': 0.98821, 'acc_vowel': 0.995299, 'acc_consonant': 0.996244, 'loss_grapheme': 0.110479, 'loss_vowel': 0.095256, 'loss_consonant': 0.063847}
   27 | 0.000075 | 120576/160635 | 0.0295 | 1.3093 |
val: {'recall': 0.992301, 'recall_grapheme': 0.990159, 'recall_vowel': 0.995186, 'recall_consonant': 0.993699, 'acc_grapheme': 0.989529, 'acc_vowel': 0.995722, 'acc_consonant': 0.996717, 'loss_grapheme': 0.082857, 'loss_vowel': 0.072849, 'loss_consonant': 0.043882}
   28 | 0.000063 | 113664/160635 | 1.9727 | 1.0804 |
val: {'recall': 0.993166, 'recall_grapheme': 0.991169, 'recall_vowel': 0.994707, 'recall_consonant': 0.995617, 'acc_grapheme': 0.990847, 'acc_vowel': 0.995896, 'acc_consonant': 0.996468, 'loss_grapheme': 0.064883, 'loss_vowel': 0.049599, 'loss_consonant': 0.032962}
   29 | 0.000051 | 106752/160635 | 1.6526 | 1

   51 | 0.000063 | 108288/160635 | 2.0852 | 1.2369 |
val: {'recall': 0.993453, 'recall_grapheme': 0.991519, 'recall_vowel': 0.995962, 'recall_consonant': 0.994811, 'acc_grapheme': 0.99117, 'acc_vowel': 0.996592, 'acc_consonant': 0.996941, 'loss_grapheme': 0.07984, 'loss_vowel': 0.068836, 'loss_consonant': 0.047099}
   52 | 0.000051 | 101376/160635 | 2.4363 | 1.1049 |
val: {'recall': 0.994352, 'recall_grapheme': 0.99271, 'recall_vowel': 0.996683, 'recall_consonant': 0.995305, 'acc_grapheme': 0.992066, 'acc_vowel': 0.996667, 'acc_consonant': 0.997214, 'loss_grapheme': 0.060999, 'loss_vowel': 0.049249, 'loss_consonant': 0.035083}
   53 | 0.000038 | 094464/160635 | 1.6521 | 1.1654 |
val: {'recall': 0.994866, 'recall_grapheme': 0.993003, 'recall_vowel': 0.996593, 'recall_consonant': 0.996867, 'acc_grapheme': 0.992488, 'acc_vowel': 0.996941, 'acc_consonant': 0.997687, 'loss_grapheme': 0.061912, 'loss_vowel': 0.050704, 'loss_consonant': 0.034829}
** saved
   54 | 0.000026 | 087552/160635 | 0.

   76 | 0.000038 | 089088/160635 | 0.8051 | 1.0865 |
val: {'recall': 0.994123, 'recall_grapheme': 0.992238, 'recall_vowel': 0.995519, 'recall_consonant': 0.996495, 'acc_grapheme': 0.991419, 'acc_vowel': 0.996468, 'acc_consonant': 0.997289, 'loss_grapheme': 0.084627, 'loss_vowel': 0.076334, 'loss_consonant': 0.050077}
   77 | 0.000026 | 082176/160635 | 1.9400 | 1.0239 |
val: {'recall': 0.994848, 'recall_grapheme': 0.993567, 'recall_vowel': 0.996704, 'recall_consonant': 0.995556, 'acc_grapheme': 0.993135, 'acc_vowel': 0.997189, 'acc_consonant': 0.997438, 'loss_grapheme': 0.040753, 'loss_vowel': 0.029628, 'loss_consonant': 0.020642}
   78 | 0.000015 | 075264/160635 | 0.0087 | 1.1218 |
val: {'recall': 0.994816, 'recall_grapheme': 0.992987, 'recall_vowel': 0.996701, 'recall_consonant': 0.996589, 'acc_grapheme': 0.992812, 'acc_vowel': 0.997214, 'acc_consonant': 0.997513, 'loss_grapheme': 0.052885, 'loss_vowel': 0.043829, 'loss_consonant': 0.029879}
   79 | 0.000008 | 068352/160635 | 0.0042 |

  101 | 0.000015 | 069888/160635 | 1.8625 | 1.1612 |
val: {'recall': 0.994579, 'recall_grapheme': 0.992837, 'recall_vowel': 0.996571, 'recall_consonant': 0.996073, 'acc_grapheme': 0.99224, 'acc_vowel': 0.997214, 'acc_consonant': 0.997463, 'loss_grapheme': 0.06383, 'loss_vowel': 0.056826, 'loss_consonant': 0.038237}
  102 | 0.000008 | 062976/160635 | 0.7255 | 1.2493 |
val: {'recall': 0.994936, 'recall_grapheme': 0.993274, 'recall_vowel': 0.996968, 'recall_consonant': 0.996228, 'acc_grapheme': 0.992787, 'acc_vowel': 0.997488, 'acc_consonant': 0.997786, 'loss_grapheme': 0.04378, 'loss_vowel': 0.036432, 'loss_consonant': 0.024064}
  103 | 0.000003 | 056064/160635 | 2.5527 | 1.1420 |
val: {'recall': 0.994483, 'recall_grapheme': 0.992412, 'recall_vowel': 0.996446, 'recall_consonant': 0.996661, 'acc_grapheme': 0.991916, 'acc_vowel': 0.997289, 'acc_consonant': 0.997712, 'loss_grapheme': 0.056261, 'loss_vowel': 0.053454, 'loss_consonant': 0.035324}
  104 | 0.000001 | 049152/160635 | 0.0052 | 1.

  126 | 0.000003 | 050688/160635 | 0.0189 | 0.8395 |
val: {'recall': 0.996327, 'recall_grapheme': 0.994808, 'recall_vowel': 0.997596, 'recall_consonant': 0.998096, 'acc_grapheme': 0.994628, 'acc_vowel': 0.997886, 'acc_consonant': 0.998383, 'loss_grapheme': 0.025142, 'loss_vowel': 0.013603, 'loss_consonant': 0.009033}
** saved
  127 | 0.000001 | 043776/160635 | 2.9822 | 1.1725 |
val: {'recall': 0.995387, 'recall_grapheme': 0.993219, 'recall_vowel': 0.996952, 'recall_consonant': 0.998159, 'acc_grapheme': 0.992886, 'acc_vowel': 0.997388, 'acc_consonant': 0.998135, 'loss_grapheme': 0.04732, 'loss_vowel': 0.039326, 'loss_consonant': 0.025731}
  128 | 0.000003 | 036864/160635 | 0.0053 | 1.0314 |
val: {'recall': 0.99531, 'recall_grapheme': 0.993278, 'recall_vowel': 0.996695, 'recall_consonant': 0.99799, 'acc_grapheme': 0.992961, 'acc_vowel': 0.997388, 'acc_consonant': 0.998159, 'loss_grapheme': 0.03769, 'loss_vowel': 0.02793, 'loss_consonant': 0.019143}
  129 | 0.000008 | 029952/160635 | 0.00

  151 | 0.000003 | 031488/160635 | 0.0031 | 0.9315 |
val: {'recall': 0.995941, 'recall_grapheme': 0.994214, 'recall_vowel': 0.997565, 'recall_consonant': 0.997771, 'acc_grapheme': 0.993981, 'acc_vowel': 0.997737, 'acc_consonant': 0.99806, 'loss_grapheme': 0.033601, 'loss_vowel': 0.023653, 'loss_consonant': 0.015669}
  152 | 0.000008 | 024576/160635 | 1.6167 | 0.9418 |
val: {'recall': 0.995369, 'recall_grapheme': 0.99337, 'recall_vowel': 0.997129, 'recall_consonant': 0.997607, 'acc_grapheme': 0.993061, 'acc_vowel': 0.997314, 'acc_consonant': 0.997836, 'loss_grapheme': 0.053603, 'loss_vowel': 0.047442, 'loss_consonant': 0.030336}
  153 | 0.000015 | 017664/160635 | 1.9491 | 0.9719 |
val: {'recall': 0.99535, 'recall_grapheme': 0.993089, 'recall_vowel': 0.997371, 'recall_consonant': 0.997852, 'acc_grapheme': 0.992886, 'acc_vowel': 0.997413, 'acc_consonant': 0.997911, 'loss_grapheme': 0.050413, 'loss_vowel': 0.043729, 'loss_consonant': 0.027799}
  154 | 0.000026 | 010752/160635 | 0.5529 | 0.

  176 | 0.000015 | 012288/160635 | 0.0069 | 0.7804 |
val: {'recall': 0.996229, 'recall_grapheme': 0.99442, 'recall_vowel': 0.998116, 'recall_consonant': 0.997961, 'acc_grapheme': 0.994429, 'acc_vowel': 0.997886, 'acc_consonant': 0.998334, 'loss_grapheme': 0.03338, 'loss_vowel': 0.023859, 'loss_consonant': 0.016146}
  177 | 0.000026 | 005376/160635 | 1.6301 | 1.1908 |
val: {'recall': 0.995366, 'recall_grapheme': 0.993325, 'recall_vowel': 0.997324, 'recall_consonant': 0.997491, 'acc_grapheme': 0.993558, 'acc_vowel': 0.997413, 'acc_consonant': 0.99796, 'loss_grapheme': 0.053936, 'loss_vowel': 0.046522, 'loss_consonant': 0.03066}
  177 | 0.000038 | 158976/160635 | 1.5745 | 0.9867 |
val: {'recall': 0.995656, 'recall_grapheme': 0.993776, 'recall_vowel': 0.997664, 'recall_consonant': 0.997408, 'acc_grapheme': 0.993583, 'acc_vowel': 0.997538, 'acc_consonant': 0.997985, 'loss_grapheme': 0.069683, 'loss_vowel': 0.055781, 'loss_consonant': 0.038163}
  178 | 0.000050 | 152064/160635 | 0.0041 | 1.0

  200 | 0.000038 | 153600/160635 | 0.0174 | 0.9570 |
val: {'recall': 0.995923, 'recall_grapheme': 0.994982, 'recall_vowel': 0.997364, 'recall_consonant': 0.996363, 'acc_grapheme': 0.994553, 'acc_vowel': 0.99796, 'acc_consonant': 0.99811, 'loss_grapheme': 0.02816, 'loss_vowel': 0.016407, 'loss_consonant': 0.012094}
  201 | 0.000051 | 146688/160635 | 2.0613 | 1.0812 |
val: {'recall': 0.995229, 'recall_grapheme': 0.993138, 'recall_vowel': 0.996965, 'recall_consonant': 0.997673, 'acc_grapheme': 0.99316, 'acc_vowel': 0.997388, 'acc_consonant': 0.998234, 'loss_grapheme': 0.069777, 'loss_vowel': 0.052476, 'loss_consonant': 0.035723}
  202 | 0.000063 | 139776/160635 | 0.0076 | 1.0752 |
val: {'recall': 0.996016, 'recall_grapheme': 0.994547, 'recall_vowel': 0.997224, 'recall_consonant': 0.997744, 'acc_grapheme': 0.993881, 'acc_vowel': 0.997562, 'acc_consonant': 0.998184, 'loss_grapheme': 0.037891, 'loss_vowel': 0.026039, 'loss_consonant': 0.016262}
  203 | 0.000075 | 132864/160635 | 0.5984 | 0.9

KeyboardInterrupt: 

In [44]:
train(args)

{'recall': 0.991643, 'recall_grapheme': 0.987994, 'recall_vowel': 0.995136, 'recall_consonant': 0.995447, 'acc_grapheme': 0.987439, 'acc_vowel': 0.99515, 'acc_consonant': 0.996095, 'loss_grapheme': 0.16107, 'loss_vowel': 0.121419, 'loss_consonant': 0.08749}
    0 | 0.000020 | 153600/160635 | 1.8225 | 1.0988 |
val: {'recall': 0.991303, 'recall_grapheme': 0.987622, 'recall_vowel': 0.994785, 'recall_consonant': 0.99518, 'acc_grapheme': 0.986544, 'acc_vowel': 0.995274, 'acc_consonant': 0.996045, 'loss_grapheme': 0.095816, 'loss_vowel': 0.073148, 'loss_consonant': 0.051373}
    1 | 0.000020 | 146688/160635 | 1.9591 | 1.3681 |
val: {'recall': 0.99159, 'recall_grapheme': 0.988516, 'recall_vowel': 0.994824, 'recall_consonant': 0.994504, 'acc_grapheme': 0.98826, 'acc_vowel': 0.995473, 'acc_consonant': 0.996568, 'loss_grapheme': 0.128494, 'loss_vowel': 0.11986, 'loss_consonant': 0.076155}
    2 | 0.000020 | 139776/160635 | 2.3023 | 1.1676 |
val: {'recall': 0.99295, 'recall_grapheme': 0.990509, '

KeyboardInterrupt: 

In [37]:
train(args)

{'recall': 0.964754, 'recall_grapheme': 0.945865, 'recall_vowel': 0.981888, 'recall_consonant': 0.985398, 'acc_grapheme': 0.942221, 'acc_vowel': 0.979132, 'acc_consonant': 0.978784, 'loss_grapheme': 0.446205, 'loss_vowel': 0.209152, 'loss_consonant': 0.173068}
    0 | 0.000050 | 153600/160635 | 2.0984 | 2.0929 |
val: {'recall': 0.987377, 'recall_grapheme': 0.982044, 'recall_vowel': 0.993094, 'recall_consonant': 0.992325, 'acc_grapheme': 0.982763, 'acc_vowel': 0.993732, 'acc_consonant': 0.99423, 'loss_grapheme': 0.182521, 'loss_vowel': 0.127602, 'loss_consonant': 0.087338}
** saved




    1 | 0.000049 | 146688/160635 | 1.1326 | 2.0749 |
val: {'recall': 0.988424, 'recall_grapheme': 0.983784, 'recall_vowel': 0.993348, 'recall_consonant': 0.992778, 'acc_grapheme': 0.983808, 'acc_vowel': 0.993682, 'acc_consonant': 0.994478, 'loss_grapheme': 0.193488, 'loss_vowel': 0.145329, 'loss_consonant': 0.100129}
** saved
    2 | 0.000047 | 139776/160635 | 2.8045 | 2.0387 |
val: {'recall': 0.989167, 'recall_grapheme': 0.984252, 'recall_vowel': 0.993671, 'recall_consonant': 0.994493, 'acc_grapheme': 0.984455, 'acc_vowel': 0.994031, 'acc_consonant': 0.994702, 'loss_grapheme': 0.209331, 'loss_vowel': 0.179077, 'loss_consonant': 0.107373}
** saved
    3 | 0.000043 | 132864/160635 | 1.2420 | 2.0176 |
val: {'recall': 0.989761, 'recall_grapheme': 0.985813, 'recall_vowel': 0.993682, 'recall_consonant': 0.993737, 'acc_grapheme': 0.984082, 'acc_vowel': 0.993832, 'acc_consonant': 0.994777, 'loss_grapheme': 0.232536, 'loss_vowel': 0.20679, 'loss_consonant': 0.126389}
** saved
    4 | 0.000038 

   26 | 0.000043 | 127488/160635 | 0.5985 | 1.9466 |
val: {'recall': 0.989934, 'recall_grapheme': 0.985778, 'recall_vowel': 0.994147, 'recall_consonant': 0.994034, 'acc_grapheme': 0.986047, 'acc_vowel': 0.994727, 'acc_consonant': 0.995349, 'loss_grapheme': 0.162728, 'loss_vowel': 0.12239, 'loss_consonant': 0.085427}
   27 | 0.000038 | 120576/160635 | 1.7230 | 2.0704 |
val: {'recall': 0.990313, 'recall_grapheme': 0.986359, 'recall_vowel': 0.994044, 'recall_consonant': 0.994491, 'acc_grapheme': 0.985375, 'acc_vowel': 0.994926, 'acc_consonant': 0.995374, 'loss_grapheme': 0.184037, 'loss_vowel': 0.165725, 'loss_consonant': 0.102966}
   28 | 0.000032 | 113664/160635 | 1.0739 | 1.8342 |
val: {'recall': 0.991416, 'recall_grapheme': 0.987833, 'recall_vowel': 0.99478, 'recall_consonant': 0.99522, 'acc_grapheme': 0.986842, 'acc_vowel': 0.9952, 'acc_consonant': 0.995797, 'loss_grapheme': 0.157744, 'loss_vowel': 0.121454, 'loss_consonant': 0.083955}
** saved
   29 | 0.000026 | 106752/160635 | 3.01

KeyboardInterrupt: 

In [79]:
train(args)

{'recall': 0.997401, 'recall_grapheme': 0.996331, 'recall_vowel': 0.998438, 'recall_consonant': 0.998503, 'acc_grapheme': 0.995961, 'acc_vowel': 0.998479, 'acc_consonant': 0.998354, 'loss_grapheme': 0.019338, 'loss_vowel': 0.009742, 'loss_consonant': 0.007437}
    0 | 0.000050 | 153600/160735 | 1.1514 | 1.0147 |
val: {'recall': 0.99651, 'recall_grapheme': 0.994837, 'recall_vowel': 0.99797, 'recall_consonant': 0.998399, 'acc_grapheme': 0.994664, 'acc_vowel': 0.997905, 'acc_consonant': 0.997905, 'loss_grapheme': 0.033601, 'loss_vowel': 0.019487, 'loss_consonant': 0.013147}




    1 | 0.000049 | 146688/160735 | 1.7823 | 1.1079 |
val: {'recall': 0.99458, 'recall_grapheme': 0.992724, 'recall_vowel': 0.997169, 'recall_consonant': 0.995701, 'acc_grapheme': 0.99227, 'acc_vowel': 0.997507, 'acc_consonant': 0.997357, 'loss_grapheme': 0.05443, 'loss_vowel': 0.035074, 'loss_consonant': 0.024806}
    2 | 0.000047 | 139776/160735 | 1.3825 | 0.9587 |
val: {'recall': 0.996538, 'recall_grapheme': 0.995082, 'recall_vowel': 0.997917, 'recall_consonant': 0.998071, 'acc_grapheme': 0.995287, 'acc_vowel': 0.99803, 'acc_consonant': 0.998105, 'loss_grapheme': 0.026622, 'loss_vowel': 0.015517, 'loss_consonant': 0.010374}
    3 | 0.000043 | 132864/160735 | 0.2587 | 0.9630 |
val: {'recall': 0.993723, 'recall_grapheme': 0.991549, 'recall_vowel': 0.99695, 'recall_consonant': 0.994846, 'acc_grapheme': 0.991772, 'acc_vowel': 0.997133, 'acc_consonant': 0.997058, 'loss_grapheme': 0.044781, 'loss_vowel': 0.020896, 'loss_consonant': 0.016922}
    4 | 0.000038 | 125952/160735 | 2.5432 | 1.12

   26 | 0.000043 | 127488/160735 | 0.0047 | 0.9961 |
val: {'recall': 0.996007, 'recall_grapheme': 0.994401, 'recall_vowel': 0.997091, 'recall_consonant': 0.998135, 'acc_grapheme': 0.993941, 'acc_vowel': 0.997681, 'acc_consonant': 0.997881, 'loss_grapheme': 0.030932, 'loss_vowel': 0.016257, 'loss_consonant': 0.012754}
   27 | 0.000038 | 120576/160735 | 2.9881 | 1.1158 |
val: {'recall': 0.994956, 'recall_grapheme': 0.992688, 'recall_vowel': 0.997026, 'recall_consonant': 0.997421, 'acc_grapheme': 0.992046, 'acc_vowel': 0.997531, 'acc_consonant': 0.997507, 'loss_grapheme': 0.04463, 'loss_vowel': 0.02532, 'loss_consonant': 0.018043}
   28 | 0.000032 | 113664/160735 | 1.9251 | 0.9913 |
val: {'recall': 0.991583, 'recall_grapheme': 0.989058, 'recall_vowel': 0.996288, 'recall_consonant': 0.99193, 'acc_grapheme': 0.98843, 'acc_vowel': 0.996559, 'acc_consonant': 0.996035, 'loss_grapheme': 0.049937, 'loss_vowel': 0.01845, 'loss_consonant': 0.016871}
   29 | 0.000026 | 106752/160735 | 0.0038 | 1.23

KeyboardInterrupt: 

In [37]:
train(args)

{'recall': 0.994807, 'recall_grapheme': 0.993211, 'recall_vowel': 0.997174, 'recall_consonant': 0.995633, 'acc_grapheme': 0.992968, 'acc_vowel': 0.997357, 'acc_consonant': 0.997033, 'loss_grapheme': 0.136084, 'loss_vowel': 0.089044, 'loss_consonant': 0.061338}
    0 | 0.000100 | 153600/160735 | 0.0037 | 1.1513 |
val: {'recall': 0.989714, 'recall_grapheme': 0.984654, 'recall_vowel': 0.994842, 'recall_consonant': 0.994707, 'acc_grapheme': 0.985538, 'acc_vowel': 0.995287, 'acc_consonant': 0.99424, 'loss_grapheme': 0.103949, 'loss_vowel': 0.063651, 'loss_consonant': 0.046677}




    1 | 0.000098 | 146688/160735 | 1.0486 | 1.0954 |
val: {'recall': 0.992231, 'recall_grapheme': 0.989128, 'recall_vowel': 0.995653, 'recall_consonant': 0.995015, 'acc_grapheme': 0.988854, 'acc_vowel': 0.99616, 'acc_consonant': 0.995537, 'loss_grapheme': 0.10485, 'loss_vowel': 0.072468, 'loss_consonant': 0.047801}
    2 | 0.000093 | 139776/160735 | 1.8618 | 1.0786 |
val: {'recall': 0.992574, 'recall_grapheme': 0.989244, 'recall_vowel': 0.995393, 'recall_consonant': 0.996413, 'acc_grapheme': 0.989602, 'acc_vowel': 0.99606, 'acc_consonant': 0.995886, 'loss_grapheme': 0.117554, 'loss_vowel': 0.077483, 'loss_consonant': 0.056601}
    3 | 0.000086 | 132864/160735 | 1.5228 | 1.0907 |
val: {'recall': 0.993971, 'recall_grapheme': 0.991248, 'recall_vowel': 0.996915, 'recall_consonant': 0.996473, 'acc_grapheme': 0.99045, 'acc_vowel': 0.997008, 'acc_consonant': 0.996584, 'loss_grapheme': 0.080691, 'loss_vowel': 0.054964, 'loss_consonant': 0.03495}
    4 | 0.000075 | 125952/160735 | 0.0120 | 1.05

** saved
   26 | 0.000086 | 127488/160735 | 3.4098 | 1.1860 |
val: {'recall': 0.99461, 'recall_grapheme': 0.991818, 'recall_vowel': 0.997081, 'recall_consonant': 0.997721, 'acc_grapheme': 0.991672, 'acc_vowel': 0.996808, 'acc_consonant': 0.996783, 'loss_grapheme': 0.064706, 'loss_vowel': 0.048355, 'loss_consonant': 0.035835}
   27 | 0.000075 | 120576/160735 | 0.0073 | 1.0463 |
val: {'recall': 0.994773, 'recall_grapheme': 0.992577, 'recall_vowel': 0.997597, 'recall_consonant': 0.996342, 'acc_grapheme': 0.99257, 'acc_vowel': 0.997606, 'acc_consonant': 0.997182, 'loss_grapheme': 0.043618, 'loss_vowel': 0.028452, 'loss_consonant': 0.022686}
   28 | 0.000063 | 113664/160735 | 2.6333 | 1.1025 |
val: {'recall': 0.993794, 'recall_grapheme': 0.991102, 'recall_vowel': 0.996929, 'recall_consonant': 0.996045, 'acc_grapheme': 0.9905, 'acc_vowel': 0.997083, 'acc_consonant': 0.996584, 'loss_grapheme': 0.074834, 'loss_vowel': 0.060056, 'loss_consonant': 0.045007}
   29 | 0.000051 | 106752/160735 | 0.5

   51 | 0.000063 | 108288/160735 | 1.5406 | 0.9697 |
val: {'recall': 0.993109, 'recall_grapheme': 0.990217, 'recall_vowel': 0.996522, 'recall_consonant': 0.995479, 'acc_grapheme': 0.988929, 'acc_vowel': 0.996833, 'acc_consonant': 0.995961, 'loss_grapheme': 0.056458, 'loss_vowel': 0.034883, 'loss_consonant': 0.031278}
   52 | 0.000051 | 101376/160735 | 0.0033 | 1.1372 |
val: {'recall': 0.995313, 'recall_grapheme': 0.99295, 'recall_vowel': 0.997803, 'recall_consonant': 0.997548, 'acc_grapheme': 0.992495, 'acc_vowel': 0.997756, 'acc_consonant': 0.997182, 'loss_grapheme': 0.040742, 'loss_vowel': 0.026209, 'loss_consonant': 0.020372}
   53 | 0.000038 | 094464/160735 | 0.0121 | 0.8790 |
val: {'recall': 0.996658, 'recall_grapheme': 0.994888, 'recall_vowel': 0.998465, 'recall_consonant': 0.998392, 'acc_grapheme': 0.994888, 'acc_vowel': 0.99828, 'acc_consonant': 0.99793, 'loss_grapheme': 0.031662, 'loss_vowel': 0.017433, 'loss_consonant': 0.013885}
** saved
   54 | 0.000026 | 087552/160735 | 0.

   76 | 0.000038 | 089088/160735 | 2.3035 | 1.0684 |
val: {'recall': 0.995749, 'recall_grapheme': 0.993691, 'recall_vowel': 0.997361, 'recall_consonant': 0.998251, 'acc_grapheme': 0.993143, 'acc_vowel': 0.997656, 'acc_consonant': 0.997432, 'loss_grapheme': 0.037444, 'loss_vowel': 0.021558, 'loss_consonant': 0.019059}
   77 | 0.000026 | 082176/160735 | 0.0068 | 1.0897 |
val: {'recall': 0.995529, 'recall_grapheme': 0.993215, 'recall_vowel': 0.997974, 'recall_consonant': 0.99771, 'acc_grapheme': 0.993118, 'acc_vowel': 0.997856, 'acc_consonant': 0.997656, 'loss_grapheme': 0.035894, 'loss_vowel': 0.020457, 'loss_consonant': 0.016028}
   78 | 0.000015 | 075264/160735 | 0.0054 | 0.8368 |
val: {'recall': 0.996525, 'recall_grapheme': 0.994725, 'recall_vowel': 0.998073, 'recall_consonant': 0.998578, 'acc_grapheme': 0.99439, 'acc_vowel': 0.998055, 'acc_consonant': 0.998205, 'loss_grapheme': 0.027531, 'loss_vowel': 0.013278, 'loss_consonant': 0.010686}
   79 | 0.000008 | 068352/160735 | 1.2188 | 1

  101 | 0.000015 | 069888/160735 | 1.6693 | 0.9381 |
val: {'recall': 0.996133, 'recall_grapheme': 0.994301, 'recall_vowel': 0.997649, 'recall_consonant': 0.99828, 'acc_grapheme': 0.993667, 'acc_vowel': 0.997831, 'acc_consonant': 0.997955, 'loss_grapheme': 0.037403, 'loss_vowel': 0.02273, 'loss_consonant': 0.016761}
  102 | 0.000008 | 062976/160735 | 0.0031 | 0.9741 |
val: {'recall': 0.997018, 'recall_grapheme': 0.995462, 'recall_vowel': 0.998495, 'recall_consonant': 0.998655, 'acc_grapheme': 0.995088, 'acc_vowel': 0.998354, 'acc_consonant': 0.998404, 'loss_grapheme': 0.02418, 'loss_vowel': 0.011886, 'loss_consonant': 0.008562}
  103 | 0.000003 | 056064/160735 | 1.6289 | 0.8747 |
val: {'recall': 0.996627, 'recall_grapheme': 0.995, 'recall_vowel': 0.998122, 'recall_consonant': 0.998386, 'acc_grapheme': 0.99429, 'acc_vowel': 0.998055, 'acc_consonant': 0.99808, 'loss_grapheme': 0.031662, 'loss_vowel': 0.018117, 'loss_consonant': 0.013088}
  104 | 0.000001 | 049152/160735 | 0.0041 | 0.9578 

  126 | 0.000003 | 050688/160735 | 2.1043 | 0.9254 |
val: {'recall': 0.996198, 'recall_grapheme': 0.994444, 'recall_vowel': 0.998057, 'recall_consonant': 0.997847, 'acc_grapheme': 0.994315, 'acc_vowel': 0.997955, 'acc_consonant': 0.99808, 'loss_grapheme': 0.032837, 'loss_vowel': 0.021154, 'loss_consonant': 0.015282}
  127 | 0.000001 | 043776/160735 | 0.0213 | 1.0302 |
val: {'recall': 0.995947, 'recall_grapheme': 0.994032, 'recall_vowel': 0.997894, 'recall_consonant': 0.997829, 'acc_grapheme': 0.993567, 'acc_vowel': 0.997905, 'acc_consonant': 0.997905, 'loss_grapheme': 0.030067, 'loss_vowel': 0.014481, 'loss_consonant': 0.011062}
  128 | 0.000003 | 036864/160735 | 0.4714 | 1.1853 |
val: {'recall': 0.995707, 'recall_grapheme': 0.993753, 'recall_vowel': 0.99765, 'recall_consonant': 0.997672, 'acc_grapheme': 0.993118, 'acc_vowel': 0.997781, 'acc_consonant': 0.997581, 'loss_grapheme': 0.048194, 'loss_vowel': 0.033156, 'loss_consonant': 0.02459}
  129 | 0.000008 | 029952/160735 | 1.4470 | 0.

  151 | 0.000003 | 031488/160735 | 2.0051 | 1.0539 |
val: {'recall': 0.99659, 'recall_grapheme': 0.99497, 'recall_vowel': 0.997655, 'recall_consonant': 0.998764, 'acc_grapheme': 0.994564, 'acc_vowel': 0.998055, 'acc_consonant': 0.998205, 'loss_grapheme': 0.029236, 'loss_vowel': 0.017448, 'loss_consonant': 0.011928}
  152 | 0.000008 | 024576/160735 | 1.1848 | 0.8531 |
val: {'recall': 0.997071, 'recall_grapheme': 0.995564, 'recall_vowel': 0.998247, 'recall_consonant': 0.99891, 'acc_grapheme': 0.995362, 'acc_vowel': 0.998329, 'acc_consonant': 0.998479, 'loss_grapheme': 0.023826, 'loss_vowel': 0.013686, 'loss_consonant': 0.009157}
  153 | 0.000015 | 017664/160735 | 2.1579 | 0.9579 |
val: {'recall': 0.996451, 'recall_grapheme': 0.994517, 'recall_vowel': 0.998093, 'recall_consonant': 0.998676, 'acc_grapheme': 0.994265, 'acc_vowel': 0.99813, 'acc_consonant': 0.99808, 'loss_grapheme': 0.037137, 'loss_vowel': 0.022186, 'loss_consonant': 0.015718}
  154 | 0.000026 | 010752/160735 | 2.4124 | 1.16

  176 | 0.000015 | 012288/160735 | 2.4402 | 0.9590 |
val: {'recall': 0.995664, 'recall_grapheme': 0.993392, 'recall_vowel': 0.997608, 'recall_consonant': 0.998265, 'acc_grapheme': 0.993218, 'acc_vowel': 0.997756, 'acc_consonant': 0.997731, 'loss_grapheme': 0.032383, 'loss_vowel': 0.015922, 'loss_consonant': 0.012419}
  177 | 0.000026 | 005376/160735 | 1.4361 | 1.1820 |
val: {'recall': 0.99608, 'recall_grapheme': 0.9943, 'recall_vowel': 0.997567, 'recall_consonant': 0.998153, 'acc_grapheme': 0.993916, 'acc_vowel': 0.997806, 'acc_consonant': 0.997856, 'loss_grapheme': 0.028746, 'loss_vowel': 0.01329, 'loss_consonant': 0.010584}
  177 | 0.000038 | 158976/160735 | 0.0045 | 0.9369 |
val: {'recall': 0.996069, 'recall_grapheme': 0.994068, 'recall_vowel': 0.997655, 'recall_consonant': 0.998485, 'acc_grapheme': 0.993667, 'acc_vowel': 0.997856, 'acc_consonant': 0.99793, 'loss_grapheme': 0.033645, 'loss_vowel': 0.018588, 'loss_consonant': 0.013091}
  178 | 0.000050 | 152064/160735 | 0.0043 | 1.01

KeyboardInterrupt: 

In [None]:
train(args)

{'recall': 0.968796, 'recall_grapheme': 0.954354, 'recall_vowel': 0.981436, 'recall_consonant': 0.985042, 'acc_grapheme': 0.950954, 'acc_vowel': 0.981798, 'acc_consonant': 0.982496, 'loss_grapheme': 0.378759, 'loss_vowel': 0.200741, 'loss_consonant': 0.14877}
    0 | 0.000100 | 153600/160735 | 2.7201 | 2.1176 |
val: {'recall': 0.987331, 'recall_grapheme': 0.98181, 'recall_vowel': 0.994441, 'recall_consonant': 0.991263, 'acc_grapheme': 0.982521, 'acc_vowel': 0.994714, 'acc_consonant': 0.99424, 'loss_grapheme': 0.20238, 'loss_vowel': 0.171463, 'loss_consonant': 0.109966}
** saved




    1 | 0.000098 | 146688/160735 | 1.9706 | 2.0632 |
val: {'recall': 0.98721, 'recall_grapheme': 0.981044, 'recall_vowel': 0.993634, 'recall_consonant': 0.993119, 'acc_grapheme': 0.98297, 'acc_vowel': 0.994564, 'acc_consonant': 0.994789, 'loss_grapheme': 0.193056, 'loss_vowel': 0.165949, 'loss_consonant': 0.103149}
    2 | 0.000093 | 139776/160735 | 1.5632 | 1.9585 |
val: {'recall': 0.988972, 'recall_grapheme': 0.983604, 'recall_vowel': 0.993653, 'recall_consonant': 0.995028, 'acc_grapheme': 0.983992, 'acc_vowel': 0.994689, 'acc_consonant': 0.994639, 'loss_grapheme': 0.184705, 'loss_vowel': 0.15318, 'loss_consonant': 0.105877}
** saved
    3 | 0.000086 | 132864/160735 | 1.7528 | 2.1131 |
val: {'recall': 0.989968, 'recall_grapheme': 0.985642, 'recall_vowel': 0.994857, 'recall_consonant': 0.993729, 'acc_grapheme': 0.98469, 'acc_vowel': 0.995437, 'acc_consonant': 0.995138, 'loss_grapheme': 0.207736, 'loss_vowel': 0.187592, 'loss_consonant': 0.11266}
** saved
    4 | 0.000075 | 125952/1607

   26 | 0.000086 | 127488/160735 | 0.6730 | 1.8537 |
val: {'recall': 0.986989, 'recall_grapheme': 0.980761, 'recall_vowel': 0.993584, 'recall_consonant': 0.992849, 'acc_grapheme': 0.983369, 'acc_vowel': 0.994714, 'acc_consonant': 0.994514, 'loss_grapheme': 0.165355, 'loss_vowel': 0.096894, 'loss_consonant': 0.069883}
   27 | 0.000075 | 120576/160735 | 1.5488 | 1.8665 |
val: {'recall': 0.989669, 'recall_grapheme': 0.985103, 'recall_vowel': 0.994121, 'recall_consonant': 0.994349, 'acc_grapheme': 0.984241, 'acc_vowel': 0.995437, 'acc_consonant': 0.994664, 'loss_grapheme': 0.199886, 'loss_vowel': 0.181033, 'loss_consonant': 0.106551}
   28 | 0.000063 | 113664/160735 | 1.7374 | 1.8715 |
val: {'recall': 0.991073, 'recall_grapheme': 0.986988, 'recall_vowel': 0.99497, 'recall_consonant': 0.995346, 'acc_grapheme': 0.987408, 'acc_vowel': 0.995562, 'acc_consonant': 0.995312, 'loss_grapheme': 0.119674, 'loss_vowel': 0.082174, 'loss_consonant': 0.058381}
   29 | 0.000051 | 106752/160735 | 1.5602 | 

   51 | 0.000063 | 108288/160735 | 0.8534 | 1.9240 |
val: {'recall': 0.992213, 'recall_grapheme': 0.988386, 'recall_vowel': 0.996299, 'recall_consonant': 0.995783, 'acc_grapheme': 0.988131, 'acc_vowel': 0.995986, 'acc_consonant': 0.995886, 'loss_grapheme': 0.172872, 'loss_vowel': 0.14041, 'loss_consonant': 0.089729}
   52 | 0.000051 | 101376/160735 | 1.8692 | 1.8458 |
val: {'recall': 0.992772, 'recall_grapheme': 0.989642, 'recall_vowel': 0.996197, 'recall_consonant': 0.995609, 'acc_grapheme': 0.989453, 'acc_vowel': 0.996335, 'acc_consonant': 0.995886, 'loss_grapheme': 0.135381, 'loss_vowel': 0.090325, 'loss_consonant': 0.06973}
** saved
   53 | 0.000038 | 094464/160735 | 0.6151 | 1.8515 |
val: {'recall': 0.992988, 'recall_grapheme': 0.989467, 'recall_vowel': 0.995835, 'recall_consonant': 0.997184, 'acc_grapheme': 0.988979, 'acc_vowel': 0.996509, 'acc_consonant': 0.99631, 'loss_grapheme': 0.170543, 'loss_vowel': 0.143205, 'loss_consonant': 0.086028}
** saved
   54 | 0.000026 | 087552/16

   76 | 0.000038 | 089088/160735 | 1.7339 | 1.7588 |
val: {'recall': 0.992533, 'recall_grapheme': 0.989128, 'recall_vowel': 0.995878, 'recall_consonant': 0.995998, 'acc_grapheme': 0.990101, 'acc_vowel': 0.996808, 'acc_consonant': 0.996609, 'loss_grapheme': 0.126719, 'loss_vowel': 0.091811, 'loss_consonant': 0.060809}
   77 | 0.000026 | 082176/160735 | 2.1953 | 1.9615 |
val: {'recall': 0.991878, 'recall_grapheme': 0.98777, 'recall_vowel': 0.99631, 'recall_consonant': 0.995664, 'acc_grapheme': 0.988006, 'acc_vowel': 0.996783, 'acc_consonant': 0.995736, 'loss_grapheme': 0.199839, 'loss_vowel': 0.195173, 'loss_consonant': 0.119304}
   78 | 0.000015 | 075264/160735 | 1.9683 | 1.7289 |
val: {'recall': 0.991602, 'recall_grapheme': 0.987989, 'recall_vowel': 0.995791, 'recall_consonant': 0.99464, 'acc_grapheme': 0.988331, 'acc_vowel': 0.996509, 'acc_consonant': 0.995961, 'loss_grapheme': 0.181821, 'loss_vowel': 0.16174, 'loss_consonant': 0.100145}
   79 | 0.000008 | 068352/160735 | 1.5132 | 1.8

** saved
  101 | 0.000015 | 069888/160735 | 2.0242 | 1.6856 |
val: {'recall': 0.992789, 'recall_grapheme': 0.990106, 'recall_vowel': 0.996376, 'recall_consonant': 0.994569, 'acc_grapheme': 0.990874, 'acc_vowel': 0.996883, 'acc_consonant': 0.996384, 'loss_grapheme': 0.167718, 'loss_vowel': 0.110026, 'loss_consonant': 0.082676}
  102 | 0.000008 | 062976/160735 | 2.4685 | 1.7904 |
val: {'recall': 0.993504, 'recall_grapheme': 0.990287, 'recall_vowel': 0.996599, 'recall_consonant': 0.996845, 'acc_grapheme': 0.989752, 'acc_vowel': 0.997083, 'acc_consonant': 0.996335, 'loss_grapheme': 0.170766, 'loss_vowel': 0.149114, 'loss_consonant': 0.089225}
  103 | 0.000003 | 056064/160735 | 2.1733 | 1.8125 |
val: {'recall': 0.993311, 'recall_grapheme': 0.989895, 'recall_vowel': 0.996625, 'recall_consonant': 0.996829, 'acc_grapheme': 0.989702, 'acc_vowel': 0.997083, 'acc_consonant': 0.996484, 'loss_grapheme': 0.186024, 'loss_vowel': 0.164717, 'loss_consonant': 0.096067}
  104 | 0.000001 | 049152/160735 |

  126 | 0.000003 | 050688/160735 | 1.1572 | 1.8722 |
val: {'recall': 0.994112, 'recall_grapheme': 0.991223, 'recall_vowel': 0.996817, 'recall_consonant': 0.997184, 'acc_grapheme': 0.991547, 'acc_vowel': 0.997157, 'acc_consonant': 0.996759, 'loss_grapheme': 0.179152, 'loss_vowel': 0.144877, 'loss_consonant': 0.087624}
  127 | 0.000001 | 043776/160735 | 2.4575 | 1.7891 |
val: {'recall': 0.993426, 'recall_grapheme': 0.989912, 'recall_vowel': 0.996859, 'recall_consonant': 0.997023, 'acc_grapheme': 0.990026, 'acc_vowel': 0.997033, 'acc_consonant': 0.99616, 'loss_grapheme': 0.176595, 'loss_vowel': 0.157275, 'loss_consonant': 0.090055}
  128 | 0.000003 | 036864/160735 | 1.3953 | 1.7547 |
val: {'recall': 0.993973, 'recall_grapheme': 0.990907, 'recall_vowel': 0.996913, 'recall_consonant': 0.997167, 'acc_grapheme': 0.990699, 'acc_vowel': 0.997157, 'acc_consonant': 0.996434, 'loss_grapheme': 0.160871, 'loss_vowel': 0.133616, 'loss_consonant': 0.079128}
  129 | 0.000008 | 029952/160735 | 3.5278 | 

** saved
  151 | 0.000003 | 031488/160735 | 1.4664 | 1.8525 |
val: {'recall': 0.993524, 'recall_grapheme': 0.990969, 'recall_vowel': 0.996435, 'recall_consonant': 0.995726, 'acc_grapheme': 0.990051, 'acc_vowel': 0.996883, 'acc_consonant': 0.996434, 'loss_grapheme': 0.169892, 'loss_vowel': 0.152663, 'loss_consonant': 0.094209}
  152 | 0.000008 | 024576/160735 | 0.8803 | 1.5736 |
val: {'recall': 0.994322, 'recall_grapheme': 0.992729, 'recall_vowel': 0.996545, 'recall_consonant': 0.995284, 'acc_grapheme': 0.992669, 'acc_vowel': 0.996958, 'acc_consonant': 0.996933, 'loss_grapheme': 0.121052, 'loss_vowel': 0.078456, 'loss_consonant': 0.056436}
  153 | 0.000015 | 017664/160735 | 1.7538 | 1.7494 |
val: {'recall': 0.993763, 'recall_grapheme': 0.990911, 'recall_vowel': 0.996625, 'recall_consonant': 0.996606, 'acc_grapheme': 0.9903, 'acc_vowel': 0.997033, 'acc_consonant': 0.996833, 'loss_grapheme': 0.163015, 'loss_vowel': 0.146479, 'loss_consonant': 0.086465}
  154 | 0.000026 | 010752/160735 | 2

** saved
  176 | 0.000015 | 012288/160735 | 1.9095 | 1.9380 |
val: {'recall': 0.992947, 'recall_grapheme': 0.989441, 'recall_vowel': 0.995992, 'recall_consonant': 0.996914, 'acc_grapheme': 0.988854, 'acc_vowel': 0.996634, 'acc_consonant': 0.99611, 'loss_grapheme': 0.148052, 'loss_vowel': 0.122292, 'loss_consonant': 0.080409}
  177 | 0.000026 | 005376/160735 | 2.1672 | 1.8442 |
val: {'recall': 0.993734, 'recall_grapheme': 0.990806, 'recall_vowel': 0.996438, 'recall_consonant': 0.996885, 'acc_grapheme': 0.990176, 'acc_vowel': 0.996908, 'acc_consonant': 0.996534, 'loss_grapheme': 0.152286, 'loss_vowel': 0.113841, 'loss_consonant': 0.07727}
  177 | 0.000038 | 158976/160735 | 2.0699 | 1.8261 |
val: {'recall': 0.992541, 'recall_grapheme': 0.988906, 'recall_vowel': 0.99569, 'recall_consonant': 0.99666, 'acc_grapheme': 0.988181, 'acc_vowel': 0.996459, 'acc_consonant': 0.995761, 'loss_grapheme': 0.155896, 'loss_vowel': 0.130231, 'loss_consonant': 0.088185}
  178 | 0.000050 | 152064/160735 | 1.8

  200 | 0.000038 | 153600/160735 | 0.7268 | 1.6830 |
val: {'recall': 0.993738, 'recall_grapheme': 0.991404, 'recall_vowel': 0.996603, 'recall_consonant': 0.995542, 'acc_grapheme': 0.991248, 'acc_vowel': 0.996983, 'acc_consonant': 0.996858, 'loss_grapheme': 0.096999, 'loss_vowel': 0.065398, 'loss_consonant': 0.048689}
  201 | 0.000051 | 146688/160735 | 1.5096 | 1.7420 |
val: {'recall': 0.992967, 'recall_grapheme': 0.989507, 'recall_vowel': 0.99619, 'recall_consonant': 0.996662, 'acc_grapheme': 0.988854, 'acc_vowel': 0.996709, 'acc_consonant': 0.995936, 'loss_grapheme': 0.095177, 'loss_vowel': 0.064534, 'loss_consonant': 0.051418}
  202 | 0.000063 | 139776/160735 | 1.7080 | 1.7172 |
val: {'recall': 0.992058, 'recall_grapheme': 0.98805, 'recall_vowel': 0.996103, 'recall_consonant': 0.996028, 'acc_grapheme': 0.987508, 'acc_vowel': 0.996409, 'acc_consonant': 0.995661, 'loss_grapheme': 0.097064, 'loss_vowel': 0.073032, 'loss_consonant': 0.060751}
  203 | 0.000075 | 132864/160735 | 0.9834 | 1

In [None]:
#save_model(model, model_file)

In [39]:
del model