In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image

In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
#import albumentations as albu
def get_train_augs():
    return RandAugment(n=2, m=27)

In [9]:
#plt.imshow(x)

In [10]:
#np.arange(10).copy()

In [11]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [12]:
#train_loader, val_loader = get_train_val_loaders()

In [13]:
#for x in train_loader:
#    print(x)
#    break

# model

In [14]:
#import pretrainedmodels

In [15]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [16]:
from argparse import Namespace
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

In [17]:
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

In [18]:
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

class BengaliNet4(nn.Module):
    def __init__(self, backbone_name='se_resnext50_32x4d'):
        super(BengaliNet4, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.n_word = 1295
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant + self.n_word
        
        self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
        self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)
        
        self.num_p2_features = self.backbone.layer2[-1].se_module.fc2.out_channels
        self.num_p3_features = self.backbone.layer3[-1].se_module.fc2.out_channels
        self.p2_head = nn.Conv2d(self.num_p2_features, self.num_p2_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.p3_head = nn.Conv2d(self.num_p3_features, self.num_p3_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn2 = nn.BatchNorm2d(self.num_p2_features * 4)
        self.bn3 = nn.BatchNorm2d(self.num_p3_features * 4)
        self.act2 = Swish()
        self.act3 = Swish()
        
        self.fc_aux1 = nn.Linear(self.num_p3_features * 4, self.num_classes)
        self.fc_aux2 = nn.Linear(self.num_p2_features * 4, self.num_classes)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        for fc in [self.fc, self.fc_aux1, self.fc_aux2]:
            nn.init.zeros_(fc.bias.data)

        print('init model4')
        
    def features(self, x):
        x = self.backbone.layer0(x); #print(x.size())
        x = self.backbone.layer1(x); #print(x.size())
        x = self.backbone.layer2(x); p2 = x; p2 = self.p2_head(p2); p2 = self.bn2(p2); p2 = self.act2(p2) #print(x.size())
        x = self.backbone.layer3(x); p3 = x; p3 = self.p3_head(p3); p3 = self.bn3(p3); p3 = self.act3(p3) #print(x.size())
        x = self.backbone.layer4(x); #print(x.size())
        return x, p2, p3
        
    def logits(self, x, p2, p3):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = torch.flatten(x, 1)
        
        p2 = self.avg_pool(p2)
        p2 = torch.flatten(p2, 1)
        
        p3 = self.avg_pool(p3)
        p3 = torch.flatten(p3, 1)
        return self.fc(x), self.fc_aux1(p3), self.fc_aux2(p2)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        x, p2, p3 = self.features(x)
        x, logits_aux1, logits_aux2 = self.logits(x, p2, p3)

        return x, logits_aux1, logits_aux2

In [19]:
MODEL_DIR = './model4-ckps'
def create_model(args):
    model = BengaliNet4(args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [20]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [21]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [22]:
round(1/9, 6)

0.111111

In [23]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, preds3, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2) == len(preds3)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    recall_word = sklearn.metrics.recall_score(preds3, y[:, 3], average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    metrics['recall_word'] = round(recall_word, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    metrics['acc_word'] = round((preds3 == y[:, 3]).sum() / len(y), 6)    
    
    return metrics

In [24]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    loss3 = F.cross_entropy(outputs[3], y_true[:, 3], reduction='mean')
    
    return loss0 + loss1 + loss2 + loss3 #, loss0.item(), loss1.item(), loss2.item()
    #return loss3

In [25]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2, loss3 = 0., 0., 0., 0.
    preds0, preds1, preds2, preds3 = [], [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs, outputs_aux1, outputs_aux2 = model(x)
            #avg_outputs = torch.mean(torch.stack([outputs, outputs_aux1, outputs_aux2], 0), 0)
            outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            preds3.append(torch.max(outputs[3], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            loss3 += F.cross_entropy(outputs[3], y[:, 3], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    preds3 = torch.cat(preds3, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, preds3, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    metrics['loss_word'] = round(loss3 / val_loader.num, 6)
    
    return metrics
            

In [26]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [27]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [28]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [29]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [30]:
np.random.random()

0.26991249636540915

In [31]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from gridmask import GridMask

In [32]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [33]:
best_metrics = 0.

def train(args, model):
    optimizer = make_optimizer(model)
    scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        train_cycle(args, model, optimizer, scheduler)
        #args.base_lr = 4e-4
        #args.num_epochs = 100
        #args.warmup_epochs = 10
        scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

def train_cycle(args, model, optimizer, lr_scheduler):
    global best_metrics
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)

    if val_metrics[best_key] > best_metrics:
        best_metrics = val_metrics[best_key]
    
    model.train()
    train_iter = 0
    grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

    for epoch in range(args.num_epochs):
        grid.set_prob(epoch, args.st_epochs)

        train_loss = 0

        bg = time.time()
        for batch_idx, (img, orig_img, targets) in enumerate(train_loader):
            train_iter += 1
            img, orig_img, targets  = img.cuda(), orig_img.cuda(), targets.cuda()
            batch_size = img.size(0)
            r = np.random.rand()

            if r < 0.3:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs, outputs_aux1, outputs_aux2 = model(img)
                loss_primary = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
                loss_aux1 = criterion(outputs_aux1, target_a) * lam + criterion(outputs_aux1, target_b) * (1. - lam)
                loss_aux2 = criterion(outputs_aux2, target_a) * lam + criterion(outputs_aux2, target_b) * (1. - lam)
                loss = loss_primary + (loss_aux1 + loss_aux2)*0.5
            elif r > 0.7:
                img = grid(img)
                outputs, outputs_aux1, outputs_aux2 = model(img)
                loss_primary = criterion(outputs, targets)
                loss_aux1 = criterion(outputs_aux1, targets)
                loss_aux2 = criterion(outputs_aux2, targets)
                loss = loss_primary + (loss_aux1 + loss_aux2)*0.5
            else:
                orig_img, targets = mixup(orig_img, targets)
                outputs, outputs_aux1, outputs_aux2 = model(orig_img)
                loss_primary = mixup_criterion(outputs, targets)
                loss_aux1 = mixup_criterion(outputs_aux1, targets)
                loss_aux2 = mixup_criterion(outputs_aux2, targets)
                loss = loss_primary + (loss_aux1 + loss_aux2)*0.5
                #loss = criterion(outputs, targets)

            optimizer.zero_grad()
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            #loss.backward()
            lr_scheduler(optimizer, batch_idx, epoch)
            optimizer.step()            
            
            current_lr = get_lrs(optimizer)

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

        if True:#train_iter > 0 and train_iter % args.iter_val == 0:
            val_metrics = validate(model, val_loader)
            print('\nval:', val_metrics)
                
            if val_metrics[best_key] > best_metrics:
                best_metrics = val_metrics[best_key]
                save_model(model, model_file)
                print('###>>>>> saved')
                
            model.train()

In [34]:
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'model4_se_resnext50_fold0_224.pth'

args.base_lr = 1.5e-4
args.num_epochs = 80
args.warmup_epochs = 5
args.num_cycles = 100
args.batch_size = 640
args.val_batch_size = 1024
args.st_epochs = 10

args.beta = 1.0
args.cutmix_prob = 0.5

In [35]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=0)

(200840, 6)
(200840, 32332)
(160596, 6) (40244, 6)


In [36]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold0_224.pth...


In [37]:
#for param in model.backbone.parameters():
#    param.requires_grad = False

In [38]:
#validate(nn.DataParallel(model), val_loader)

In [39]:
train(args, model)

CYCLE: 1
{'recall': 0.997718, 'recall_grapheme': 0.997067, 'recall_vowel': 0.998569, 'recall_consonant': 0.99817, 'recall_word': 0.996409, 'acc_grapheme': 0.996844, 'acc_vowel': 0.998782, 'acc_consonant': 0.998633, 'acc_word': 0.996496, 'loss_grapheme': 0.053724, 'loss_vowel': 0.036053, 'loss_consonant': 0.028162, 'loss_word': 0.034346}
    0 | 0.000030 | 160000/160596 | 12.9655 | 5.9866 ||
val: {'recall': 0.997588, 'recall_grapheme': 0.996898, 'recall_vowel': 0.99843, 'recall_consonant': 0.998124, 'recall_word': 0.996112, 'acc_grapheme': 0.996546, 'acc_vowel': 0.998758, 'acc_consonant': 0.998658, 'acc_word': 0.996223, 'loss_grapheme': 0.050285, 'loss_vowel': 0.03403, 'loss_consonant': 0.026416, 'loss_word': 0.031264}
    1 | 0.000060 | 160000/160596 | 7.8102 | 5.5703 ||
val: {'recall': 0.997329, 'recall_grapheme': 0.996386, 'recall_vowel': 0.99844, 'recall_consonant': 0.998104, 'recall_word': 0.995942, 'acc_grapheme': 0.996397, 'acc_vowel': 0.998758, 'acc_consonant': 0.998559, 'acc_wo

   21 | 0.000124 | 160000/160596 | 1.0600 | 5.8904 |||
val: {'recall': 0.997305, 'recall_grapheme': 0.9967, 'recall_vowel': 0.998158, 'recall_consonant': 0.997662, 'recall_word': 0.994828, 'acc_grapheme': 0.995751, 'acc_vowel': 0.998037, 'acc_consonant': 0.998112, 'acc_word': 0.99503, 'loss_grapheme': 0.052154, 'loss_vowel': 0.032498, 'loss_consonant': 0.022602, 'loss_word': 0.038545}
   22 | 0.000121 | 160000/160596 | 4.1207 | 5.5746 ||
val: {'recall': 0.997356, 'recall_grapheme': 0.996551, 'recall_vowel': 0.998538, 'recall_consonant': 0.997782, 'recall_word': 0.995879, 'acc_grapheme': 0.996372, 'acc_vowel': 0.99841, 'acc_consonant': 0.99831, 'acc_word': 0.99595, 'loss_grapheme': 0.066727, 'loss_vowel': 0.048288, 'loss_consonant': 0.033742, 'loss_word': 0.039384}
   23 | 0.000119 | 160000/160596 | 15.8434 | 5.9452 ||
val: {'recall': 0.996974, 'recall_grapheme': 0.995963, 'recall_vowel': 0.998354, 'recall_consonant': 0.997617, 'recall_word': 0.995656, 'acc_grapheme': 0.995726, 'acc_vow

   42 | 0.000066 | 160000/160596 | 14.6596 | 5.4598 |
val: {'recall': 0.997537, 'recall_grapheme': 0.996912, 'recall_vowel': 0.998267, 'recall_consonant': 0.998056, 'recall_word': 0.996433, 'acc_grapheme': 0.99677, 'acc_vowel': 0.998484, 'acc_consonant': 0.998559, 'acc_word': 0.996546, 'loss_grapheme': 0.067728, 'loss_vowel': 0.049037, 'loss_consonant': 0.036609, 'loss_word': 0.038031}
   43 | 0.000063 | 160000/160596 | 1.0342 | 5.4772 ||
val: {'recall': 0.997367, 'recall_grapheme': 0.996523, 'recall_vowel': 0.998446, 'recall_consonant': 0.997977, 'recall_word': 0.996105, 'acc_grapheme': 0.996571, 'acc_vowel': 0.998459, 'acc_consonant': 0.998459, 'acc_word': 0.996198, 'loss_grapheme': 0.036174, 'loss_vowel': 0.025477, 'loss_consonant': 0.021143, 'loss_word': 0.023965}
   44 | 0.000060 | 160000/160596 | 0.8816 | 5.0771 |||
val: {'recall': 0.997472, 'recall_grapheme': 0.996858, 'recall_vowel': 0.998425, 'recall_consonant': 0.997746, 'recall_word': 0.996292, 'acc_grapheme': 0.996968, 'acc

   63 | 0.000014 | 160000/160596 | 7.3366 | 5.3388 ||
val: {'recall': 0.997169, 'recall_grapheme': 0.996223, 'recall_vowel': 0.998481, 'recall_consonant': 0.997748, 'recall_word': 0.996268, 'acc_grapheme': 0.996521, 'acc_vowel': 0.998608, 'acc_consonant': 0.998459, 'acc_word': 0.996397, 'loss_grapheme': 0.023479, 'loss_vowel': 0.016169, 'loss_consonant': 0.014648, 'loss_word': 0.017513}
   64 | 0.000013 | 160000/160596 | 0.8939 | 5.4571 ||
val: {'recall': 0.99742, 'recall_grapheme': 0.996568, 'recall_vowel': 0.998604, 'recall_consonant': 0.997939, 'recall_word': 0.996806, 'acc_grapheme': 0.997018, 'acc_vowel': 0.998733, 'acc_consonant': 0.998608, 'acc_word': 0.996919, 'loss_grapheme': 0.023622, 'loss_vowel': 0.017236, 'loss_consonant': 0.014628, 'loss_word': 0.017275}
   65 | 0.000011 | 160000/160596 | 0.8816 | 5.4897 ||
val: {'recall': 0.997477, 'recall_grapheme': 0.996666, 'recall_vowel': 0.998701, 'recall_consonant': 0.997876, 'recall_word': 0.996587, 'acc_grapheme': 0.996795, 'acc_

    4 | 0.000148 | 160000/160596 | 0.9098 | 5.8904 |||
val: {'recall': 0.997415, 'recall_grapheme': 0.996772, 'recall_vowel': 0.998159, 'recall_consonant': 0.997956, 'recall_word': 0.99652, 'acc_grapheme': 0.997018, 'acc_vowel': 0.998484, 'acc_consonant': 0.998708, 'acc_word': 0.996645, 'loss_grapheme': 0.022326, 'loss_vowel': 0.014776, 'loss_consonant': 0.013098, 'loss_word': 0.018249}
    5 | 0.000148 | 160000/160596 | 10.9768 | 6.1308 |
val: {'recall': 0.996735, 'recall_grapheme': 0.996215, 'recall_vowel': 0.998453, 'recall_consonant': 0.996058, 'recall_word': 0.995799, 'acc_grapheme': 0.996248, 'acc_vowel': 0.998435, 'acc_consonant': 0.99836, 'acc_word': 0.995801, 'loss_grapheme': 0.048717, 'loss_vowel': 0.034124, 'loss_consonant': 0.026089, 'loss_word': 0.031528}
    6 | 0.000147 | 160000/160596 | 12.8278 | 5.2024 |
val: {'recall': 0.996651, 'recall_grapheme': 0.996218, 'recall_vowel': 0.998251, 'recall_consonant': 0.995916, 'recall_word': 0.995311, 'acc_grapheme': 0.9959, 'acc_vo

   25 | 0.000114 | 160000/160596 | 6.0655 | 5.7730 |||
val: {'recall': 0.997274, 'recall_grapheme': 0.996353, 'recall_vowel': 0.998428, 'recall_consonant': 0.99796, 'recall_word': 0.996191, 'acc_grapheme': 0.996546, 'acc_vowel': 0.998484, 'acc_consonant': 0.998534, 'acc_word': 0.996273, 'loss_grapheme': 0.060595, 'loss_vowel': 0.041571, 'loss_consonant': 0.031549, 'loss_word': 0.037808}
   26 | 0.000112 | 160000/160596 | 0.9895 | 5.4671 ||
val: {'recall': 0.997405, 'recall_grapheme': 0.996481, 'recall_vowel': 0.998521, 'recall_consonant': 0.998138, 'recall_word': 0.996579, 'acc_grapheme': 0.996645, 'acc_vowel': 0.998608, 'acc_consonant': 0.998782, 'acc_word': 0.99672, 'loss_grapheme': 0.028161, 'loss_vowel': 0.018129, 'loss_consonant': 0.014739, 'loss_word': 0.019408}
   27 | 0.000109 | 160000/160596 | 1.0163 | 5.6899 ||
val: {'recall': 0.996831, 'recall_grapheme': 0.995755, 'recall_vowel': 0.998274, 'recall_consonant': 0.997541, 'recall_word': 0.995238, 'acc_grapheme': 0.995577, 'acc_

   46 | 0.000055 | 160000/160596 | 14.4797 | 5.6864 |
val: {'recall': 0.997355, 'recall_grapheme': 0.996427, 'recall_vowel': 0.998687, 'recall_consonant': 0.997881, 'recall_word': 0.996375, 'acc_grapheme': 0.996546, 'acc_vowel': 0.998683, 'acc_consonant': 0.998534, 'acc_word': 0.996472, 'loss_grapheme': 0.0414, 'loss_vowel': 0.029941, 'loss_consonant': 0.023989, 'loss_word': 0.027685}
   47 | 0.000052 | 160000/160596 | 3.1590 | 5.2538 ||
val: {'recall': 0.997333, 'recall_grapheme': 0.996835, 'recall_vowel': 0.998199, 'recall_consonant': 0.997462, 'recall_word': 0.995172, 'acc_grapheme': 0.996173, 'acc_vowel': 0.998435, 'acc_consonant': 0.998285, 'acc_word': 0.995279, 'loss_grapheme': 0.063717, 'loss_vowel': 0.038872, 'loss_consonant': 0.032387, 'loss_word': 0.041782}
   48 | 0.000049 | 160000/160596 | 4.0468 | 5.2289 ||
val: {'recall': 0.997233, 'recall_grapheme': 0.996417, 'recall_vowel': 0.998308, 'recall_consonant': 0.99779, 'recall_word': 0.996084, 'acc_grapheme': 0.996496, 'acc_vo

   67 | 0.000008 | 160000/160596 | 0.7282 | 5.1206 ||
val: {'recall': 0.997645, 'recall_grapheme': 0.997038, 'recall_vowel': 0.998539, 'recall_consonant': 0.997963, 'recall_word': 0.996738, 'acc_grapheme': 0.997118, 'acc_vowel': 0.998733, 'acc_consonant': 0.998683, 'acc_word': 0.996819, 'loss_grapheme': 0.020915, 'loss_vowel': 0.013994, 'loss_consonant': 0.012406, 'loss_word': 0.016366}
   68 | 0.000007 | 160000/160596 | 13.0432 | 4.9529 |
val: {'recall': 0.997561, 'recall_grapheme': 0.996933, 'recall_vowel': 0.998461, 'recall_consonant': 0.997918, 'recall_word': 0.996649, 'acc_grapheme': 0.996968, 'acc_vowel': 0.998733, 'acc_consonant': 0.998608, 'acc_word': 0.996745, 'loss_grapheme': 0.050717, 'loss_vowel': 0.034988, 'loss_consonant': 0.028759, 'loss_word': 0.029343}
   69 | 0.000006 | 160000/160596 | 5.7878 | 5.8231 ||
val: {'recall': 0.997109, 'recall_grapheme': 0.996105, 'recall_vowel': 0.998275, 'recall_consonant': 0.997949, 'recall_word': 0.996195, 'acc_grapheme': 0.996571, 'acc

    8 | 0.000145 | 160000/160596 | 3.5107 | 5.5183 |||
val: {'recall': 0.996481, 'recall_grapheme': 0.99518, 'recall_vowel': 0.99771, 'recall_consonant': 0.997855, 'recall_word': 0.994826, 'acc_grapheme': 0.995676, 'acc_vowel': 0.998012, 'acc_consonant': 0.998161, 'acc_word': 0.994906, 'loss_grapheme': 0.032154, 'loss_vowel': 0.0196, 'loss_consonant': 0.016373, 'loss_word': 0.026567}
    9 | 0.000144 | 160000/160596 | 0.8367 | 5.2435 ||
val: {'recall': 0.997185, 'recall_grapheme': 0.996385, 'recall_vowel': 0.998137, 'recall_consonant': 0.997831, 'recall_word': 0.995435, 'acc_grapheme': 0.995975, 'acc_vowel': 0.998385, 'acc_consonant': 0.998335, 'acc_word': 0.995527, 'loss_grapheme': 0.027364, 'loss_vowel': 0.015229, 'loss_consonant': 0.01566, 'loss_word': 0.022573}
   10 | 0.000143 | 160000/160596 | 6.3547 | 5.3057 ||
val: {'recall': 0.996551, 'recall_grapheme': 0.995442, 'recall_vowel': 0.99835, 'recall_consonant': 0.996972, 'recall_word': 0.994711, 'acc_grapheme': 0.995378, 'acc_vowe

KeyboardInterrupt: 

In [50]:
train(args, model)

CYCLE: 1
{'recall': 0.997498, 'recall_grapheme': 0.996893, 'recall_vowel': 0.99822, 'recall_consonant': 0.997987, 'recall_word': 0.996026, 'acc_grapheme': 0.996372, 'acc_vowel': 0.998633, 'acc_consonant': 0.998534, 'acc_word': 0.996099, 'loss_grapheme': 0.132114, 'loss_vowel': 0.083359, 'loss_consonant': 0.058515, 'loss_word': 0.084024}
    0 | 0.000030 | 160000/160596 | 12.7884 | 6.0407 ||
val: {'recall': 0.997627, 'recall_grapheme': 0.997045, 'recall_vowel': 0.998395, 'recall_consonant': 0.998022, 'recall_word': 0.996112, 'acc_grapheme': 0.996546, 'acc_vowel': 0.998658, 'acc_consonant': 0.998584, 'acc_word': 0.996223, 'loss_grapheme': 0.076319, 'loss_vowel': 0.051092, 'loss_consonant': 0.037538, 'loss_word': 0.048984}
###>>>>> saved
    1 | 0.000060 | 160000/160596 | 15.8839 | 6.5456 |
val: {'recall': 0.997121, 'recall_grapheme': 0.996174, 'recall_vowel': 0.998147, 'recall_consonant': 0.997988, 'recall_word': 0.995936, 'acc_grapheme': 0.996397, 'acc_vowel': 0.998484, 'acc_consonant':

   21 | 0.000124 | 160000/160596 | 1.5590 | 7.1577 ||
val: {'recall': 0.997323, 'recall_grapheme': 0.996731, 'recall_vowel': 0.997769, 'recall_consonant': 0.99806, 'recall_word': 0.995886, 'acc_grapheme': 0.996447, 'acc_vowel': 0.998385, 'acc_consonant': 0.998534, 'acc_word': 0.995975, 'loss_grapheme': 0.054767, 'loss_vowel': 0.033527, 'loss_consonant': 0.029463, 'loss_word': 0.03649}
   22 | 0.000121 | 160000/160596 | 1.5189 | 6.6509 |||
val: {'recall': 0.997124, 'recall_grapheme': 0.996714, 'recall_vowel': 0.997526, 'recall_consonant': 0.997544, 'recall_word': 0.995537, 'acc_grapheme': 0.996074, 'acc_vowel': 0.998136, 'acc_consonant': 0.998584, 'acc_word': 0.995627, 'loss_grapheme': 0.040991, 'loss_vowel': 0.026306, 'loss_consonant': 0.020716, 'loss_word': 0.028771}
   23 | 0.000119 | 160000/160596 | 6.9661 | 6.7586 ||
val: {'recall': 0.997022, 'recall_grapheme': 0.996422, 'recall_vowel': 0.997497, 'recall_consonant': 0.997747, 'recall_word': 0.995296, 'acc_grapheme': 0.99585, 'acc_v

   42 | 0.000066 | 160000/160596 | 3.9906 | 6.5162 |||
val: {'recall': 0.997145, 'recall_grapheme': 0.996552, 'recall_vowel': 0.998256, 'recall_consonant': 0.997219, 'recall_word': 0.995237, 'acc_grapheme': 0.996099, 'acc_vowel': 0.998435, 'acc_consonant': 0.998211, 'acc_word': 0.995378, 'loss_grapheme': 0.076635, 'loss_vowel': 0.047777, 'loss_consonant': 0.037093, 'loss_word': 0.049307}
   43 | 0.000063 | 160000/160596 | 1.1832 | 6.1932 |||
val: {'recall': 0.997583, 'recall_grapheme': 0.9968, 'recall_vowel': 0.998603, 'recall_consonant': 0.998127, 'recall_word': 0.996422, 'acc_grapheme': 0.99672, 'acc_vowel': 0.998683, 'acc_consonant': 0.998683, 'acc_word': 0.996521, 'loss_grapheme': 0.026095, 'loss_vowel': 0.017138, 'loss_consonant': 0.013987, 'loss_word': 0.019375}
   44 | 0.000060 | 160000/160596 | 4.8584 | 6.2760 ||
val: {'recall': 0.997324, 'recall_grapheme': 0.996856, 'recall_vowel': 0.998219, 'recall_consonant': 0.997364, 'recall_word': 0.996142, 'acc_grapheme': 0.996571, 'acc_

   64 | 0.000013 | 160000/160596 | 0.9669 | 5.9342 ||
val: {'recall': 0.997714, 'recall_grapheme': 0.997073, 'recall_vowel': 0.998622, 'recall_consonant': 0.998086, 'recall_word': 0.996663, 'acc_grapheme': 0.996869, 'acc_vowel': 0.998832, 'acc_consonant': 0.998758, 'acc_word': 0.996745, 'loss_grapheme': 0.038651, 'loss_vowel': 0.026906, 'loss_consonant': 0.020957, 'loss_word': 0.025053}
###>>>>> saved
   65 | 0.000011 | 160000/160596 | 3.5143 | 6.1874 ||
val: {'recall': 0.997392, 'recall_grapheme': 0.996659, 'recall_vowel': 0.998303, 'recall_consonant': 0.997947, 'recall_word': 0.996059, 'acc_grapheme': 0.996273, 'acc_vowel': 0.998708, 'acc_consonant': 0.998509, 'acc_word': 0.996173, 'loss_grapheme': 0.072503, 'loss_vowel': 0.047282, 'loss_consonant': 0.037336, 'loss_word': 0.04224}
   66 | 0.000010 | 160000/160596 | 7.6927 | 6.2418 ||
val: {'recall': 0.997718, 'recall_grapheme': 0.997067, 'recall_vowel': 0.998569, 'recall_consonant': 0.99817, 'recall_word': 0.996409, 'acc_grapheme': 0

KeyboardInterrupt: 

In [38]:
train(args, model)

CYCLE: 1
{'recall': 0.994744, 'recall_grapheme': 0.992397, 'recall_vowel': 0.997031, 'recall_consonant': 0.99715, 'recall_word': 0.988796, 'acc_grapheme': 0.992421, 'acc_vowel': 0.997465, 'acc_consonant': 0.997515, 'acc_word': 0.989141, 'loss_grapheme': 0.051274, 'loss_vowel': 0.044025, 'loss_consonant': 0.036861, 'loss_word': 0.707041}


  'recall', 'true', average, warn_for)


    0 | 0.000030 | 160000/160596 | 13.5191 | 16.2231 |
val: {'recall': 0.994267, 'recall_grapheme': 0.991667, 'recall_vowel': 0.997044, 'recall_consonant': 0.996688, 'recall_word': 0.989588, 'acc_grapheme': 0.991924, 'acc_vowel': 0.997341, 'acc_consonant': 0.997142, 'acc_word': 0.989315, 'loss_grapheme': 0.060404, 'loss_vowel': 0.049112, 'loss_consonant': 0.044607, 'loss_word': 0.691783}
    1 | 0.000060 | 160000/160596 | 14.7654 | 15.2369 |
val: {'recall': 0.994122, 'recall_grapheme': 0.991519, 'recall_vowel': 0.99668, 'recall_consonant': 0.996773, 'recall_word': 0.99096, 'acc_grapheme': 0.992396, 'acc_vowel': 0.997043, 'acc_consonant': 0.996944, 'acc_word': 0.990806, 'loss_grapheme': 0.069435, 'loss_vowel': 0.056182, 'loss_consonant': 0.051753, 'loss_word': 0.507367}
    2 | 0.000090 | 160000/160596 | 15.7133 | 14.6691 |
val: {'recall': 0.993512, 'recall_grapheme': 0.990841, 'recall_vowel': 0.996079, 'recall_consonant': 0.996286, 'recall_word': 0.989377, 'acc_grapheme': 0.99103, 'acc

   21 | 0.000124 | 160000/160596 | 16.9718 | 10.6579 |
val: {'recall': 0.993567, 'recall_grapheme': 0.990838, 'recall_vowel': 0.997542, 'recall_consonant': 0.995051, 'recall_word': 0.992189, 'acc_grapheme': 0.991974, 'acc_vowel': 0.997292, 'acc_consonant': 0.997018, 'acc_word': 0.992272, 'loss_grapheme': 0.218633, 'loss_vowel': 0.152869, 'loss_consonant': 0.099685, 'loss_word': 0.141267}
   22 | 0.000121 | 160000/160596 | 5.0350 | 10.8975 ||
val: {'recall': 0.994751, 'recall_grapheme': 0.99296, 'recall_vowel': 0.997277, 'recall_consonant': 0.995806, 'recall_word': 0.993694, 'acc_grapheme': 0.994111, 'acc_vowel': 0.997863, 'acc_consonant': 0.99754, 'acc_word': 0.993788, 'loss_grapheme': 0.245615, 'loss_vowel': 0.144242, 'loss_consonant': 0.097046, 'loss_word': 0.178591}
   23 | 0.000119 | 160000/160596 | 9.0374 | 10.9487 ||
val: {'recall': 0.993748, 'recall_grapheme': 0.991392, 'recall_vowel': 0.996975, 'recall_consonant': 0.995232, 'recall_word': 0.992373, 'acc_grapheme': 0.992869, 'ac

###>>>>> saved
   42 | 0.000066 | 160000/160596 | 3.6289 | 10.0790 ||
val: {'recall': 0.995378, 'recall_grapheme': 0.993991, 'recall_vowel': 0.99773, 'recall_consonant': 0.995801, 'recall_word': 0.994396, 'acc_grapheme': 0.994359, 'acc_vowel': 0.998161, 'acc_consonant': 0.997913, 'acc_word': 0.994434, 'loss_grapheme': 0.201855, 'loss_vowel': 0.120582, 'loss_consonant': 0.08279, 'loss_word': 0.130497}
   43 | 0.000063 | 160000/160596 | 3.2498 | 10.3223 ||
val: {'recall': 0.995013, 'recall_grapheme': 0.993752, 'recall_vowel': 0.997033, 'recall_consonant': 0.995512, 'recall_word': 0.993677, 'acc_grapheme': 0.994036, 'acc_vowel': 0.997565, 'acc_consonant': 0.997639, 'acc_word': 0.993763, 'loss_grapheme': 0.219137, 'loss_vowel': 0.132844, 'loss_consonant': 0.091363, 'loss_word': 0.139311}
   44 | 0.000060 | 160000/160596 | 17.1036 | 10.1444 |
val: {'recall': 0.995154, 'recall_grapheme': 0.993548, 'recall_vowel': 0.997715, 'recall_consonant': 0.995803, 'recall_word': 0.993558, 'acc_grapheme'

   63 | 0.000014 | 160000/160596 | 6.7829 | 10.0811 ||
val: {'recall': 0.996168, 'recall_grapheme': 0.994447, 'recall_vowel': 0.998185, 'recall_consonant': 0.997596, 'recall_word': 0.994998, 'acc_grapheme': 0.994906, 'acc_vowel': 0.998161, 'acc_consonant': 0.998012, 'acc_word': 0.99508, 'loss_grapheme': 0.210053, 'loss_vowel': 0.127437, 'loss_consonant': 0.089812, 'loss_word': 0.126097}
   64 | 0.000013 | 160000/160596 | 15.3425 | 9.8944 ||
val: {'recall': 0.995717, 'recall_grapheme': 0.99376, 'recall_vowel': 0.997936, 'recall_consonant': 0.997409, 'recall_word': 0.994176, 'acc_grapheme': 0.994533, 'acc_vowel': 0.997962, 'acc_consonant': 0.997689, 'acc_word': 0.99421, 'loss_grapheme': 0.2582, 'loss_vowel': 0.135459, 'loss_consonant': 0.102781, 'loss_word': 0.17121}
   65 | 0.000011 | 160000/160596 | 13.5563 | 10.0860 |
val: {'recall': 0.995152, 'recall_grapheme': 0.993505, 'recall_vowel': 0.997803, 'recall_consonant': 0.995795, 'recall_word': 0.994141, 'acc_grapheme': 0.994235, 'acc_vo

    3 | 0.000119 | 160000/160596 | 8.6064 | 9.8559 |||
val: {'recall': 0.99537, 'recall_grapheme': 0.993429, 'recall_vowel': 0.997194, 'recall_consonant': 0.997426, 'recall_word': 0.993629, 'acc_grapheme': 0.993912, 'acc_vowel': 0.997764, 'acc_consonant': 0.997838, 'acc_word': 0.993689, 'loss_grapheme': 0.19278, 'loss_vowel': 0.127564, 'loss_consonant': 0.092346, 'loss_word': 0.11302}
    4 | 0.000148 | 160000/160596 | 3.8305 | 9.9229 |||
val: {'recall': 0.994928, 'recall_grapheme': 0.993169, 'recall_vowel': 0.997291, 'recall_consonant': 0.996082, 'recall_word': 0.993215, 'acc_grapheme': 0.99349, 'acc_vowel': 0.997739, 'acc_consonant': 0.99754, 'acc_word': 0.993241, 'loss_grapheme': 0.168983, 'loss_vowel': 0.110529, 'loss_consonant': 0.074026, 'loss_word': 0.115751}
    5 | 0.000148 | 160000/160596 | 6.2714 | 9.6942 |||
val: {'recall': 0.995385, 'recall_grapheme': 0.993339, 'recall_vowel': 0.997762, 'recall_consonant': 0.997102, 'recall_word': 0.993981, 'acc_grapheme': 0.993639, 'acc_v

   24 | 0.000117 | 160000/160596 | 10.1707 | 9.4029 ||
val: {'recall': 0.995231, 'recall_grapheme': 0.993279, 'recall_vowel': 0.997045, 'recall_consonant': 0.99732, 'recall_word': 0.993024, 'acc_grapheme': 0.993862, 'acc_vowel': 0.997267, 'acc_consonant': 0.997838, 'acc_word': 0.992993, 'loss_grapheme': 0.243026, 'loss_vowel': 0.143282, 'loss_consonant': 0.107101, 'loss_word': 0.146342}
   25 | 0.000114 | 160000/160596 | 3.6174 | 9.6174 |||
val: {'recall': 0.99578, 'recall_grapheme': 0.993985, 'recall_vowel': 0.997721, 'recall_consonant': 0.99743, 'recall_word': 0.993852, 'acc_grapheme': 0.994533, 'acc_vowel': 0.997714, 'acc_consonant': 0.998062, 'acc_word': 0.993862, 'loss_grapheme': 0.232283, 'loss_vowel': 0.128963, 'loss_consonant': 0.092051, 'loss_word': 0.143839}
   26 | 0.000112 | 160000/160596 | 15.3239 | 9.0249 ||
val: {'recall': 0.996352, 'recall_grapheme': 0.994872, 'recall_vowel': 0.997761, 'recall_consonant': 0.997902, 'recall_word': 0.994711, 'acc_grapheme': 0.995005, 'acc

   45 | 0.000058 | 160000/160596 | 15.3887 | 9.1880 ||
val: {'recall': 0.996287, 'recall_grapheme': 0.994962, 'recall_vowel': 0.99743, 'recall_consonant': 0.997794, 'recall_word': 0.994464, 'acc_grapheme': 0.994832, 'acc_vowel': 0.998037, 'acc_consonant': 0.998062, 'acc_word': 0.994533, 'loss_grapheme': 0.190746, 'loss_vowel': 0.107423, 'loss_consonant': 0.07889, 'loss_word': 0.11536}
   46 | 0.000055 | 160000/160596 | 7.3814 | 8.9218 |||
val: {'recall': 0.996735, 'recall_grapheme': 0.995576, 'recall_vowel': 0.997987, 'recall_consonant': 0.997801, 'recall_word': 0.99519, 'acc_grapheme': 0.995478, 'acc_vowel': 0.99836, 'acc_consonant': 0.998087, 'acc_word': 0.995204, 'loss_grapheme': 0.20867, 'loss_vowel': 0.108323, 'loss_consonant': 0.084947, 'loss_word': 0.119926}
###>>>>> saved
   47 | 0.000052 | 160000/160596 | 15.9650 | 9.0853 ||
val: {'recall': 0.99582, 'recall_grapheme': 0.994723, 'recall_vowel': 0.997219, 'recall_consonant': 0.996615, 'recall_word': 0.994525, 'acc_grapheme': 0.9

###>>>>> saved
   66 | 0.000010 | 160000/160596 | 6.5751 | 8.8596 |||
val: {'recall': 0.996633, 'recall_grapheme': 0.995698, 'recall_vowel': 0.997825, 'recall_consonant': 0.99731, 'recall_word': 0.995198, 'acc_grapheme': 0.995726, 'acc_vowel': 0.998186, 'acc_consonant': 0.998261, 'acc_word': 0.995254, 'loss_grapheme': 0.152551, 'loss_vowel': 0.088179, 'loss_consonant': 0.064188, 'loss_word': 0.09608}
   67 | 0.000008 | 160000/160596 | 6.1815 | 8.8011 ||
val: {'recall': 0.996039, 'recall_grapheme': 0.994652, 'recall_vowel': 0.997756, 'recall_consonant': 0.997094, 'recall_word': 0.99481, 'acc_grapheme': 0.994881, 'acc_vowel': 0.998062, 'acc_consonant': 0.997987, 'acc_word': 0.994881, 'loss_grapheme': 0.229378, 'loss_vowel': 0.143013, 'loss_consonant': 0.110547, 'loss_word': 0.120738}
   68 | 0.000007 | 160000/160596 | 8.3275 | 8.5445 ||
val: {'recall': 0.99654, 'recall_grapheme': 0.995434, 'recall_vowel': 0.998036, 'recall_consonant': 0.997253, 'recall_word': 0.995196, 'acc_grapheme': 0.

    6 | 0.000147 | 160000/160596 | 14.9069 | 9.0419 ||
val: {'recall': 0.995851, 'recall_grapheme': 0.994122, 'recall_vowel': 0.997899, 'recall_consonant': 0.997262, 'recall_word': 0.994092, 'acc_grapheme': 0.994235, 'acc_vowel': 0.997888, 'acc_consonant': 0.997739, 'acc_word': 0.994161, 'loss_grapheme': 0.191561, 'loss_vowel': 0.105633, 'loss_consonant': 0.081668, 'loss_word': 0.112553}
    7 | 0.000146 | 160000/160596 | 4.9063 | 8.9092 |||
val: {'recall': 0.995831, 'recall_grapheme': 0.994298, 'recall_vowel': 0.997388, 'recall_consonant': 0.997342, 'recall_word': 0.994379, 'acc_grapheme': 0.994707, 'acc_vowel': 0.998012, 'acc_consonant': 0.998112, 'acc_word': 0.994409, 'loss_grapheme': 0.144679, 'loss_vowel': 0.084012, 'loss_consonant': 0.062502, 'loss_word': 0.096612}
    8 | 0.000145 | 160000/160596 | 14.3975 | 9.0968 ||
val: {'recall': 0.99633, 'recall_grapheme': 0.994966, 'recall_vowel': 0.997788, 'recall_consonant': 0.997601, 'recall_word': 0.994087, 'acc_grapheme': 0.994881, 'a

   27 | 0.000109 | 160000/160596 | 12.6458 | 8.4536 ||
val: {'recall': 0.996527, 'recall_grapheme': 0.995456, 'recall_vowel': 0.997616, 'recall_consonant': 0.997582, 'recall_word': 0.995389, 'acc_grapheme': 0.995527, 'acc_vowel': 0.998236, 'acc_consonant': 0.998261, 'acc_word': 0.995453, 'loss_grapheme': 0.216646, 'loss_vowel': 0.125456, 'loss_consonant': 0.084566, 'loss_word': 0.136005}
   28 | 0.000106 | 160000/160596 | 13.8348 | 8.9147 |
val: {'recall': 0.996139, 'recall_grapheme': 0.99484, 'recall_vowel': 0.997291, 'recall_consonant': 0.997584, 'recall_word': 0.994901, 'acc_grapheme': 0.994981, 'acc_vowel': 0.997987, 'acc_consonant': 0.998112, 'acc_word': 0.994981, 'loss_grapheme': 0.221499, 'loss_vowel': 0.133914, 'loss_consonant': 0.091652, 'loss_word': 0.127087}
   29 | 0.000104 | 160000/160596 | 5.0358 | 8.4889 |||
val: {'recall': 0.995676, 'recall_grapheme': 0.994342, 'recall_vowel': 0.997737, 'recall_consonant': 0.996281, 'recall_word': 0.994328, 'acc_grapheme': 0.994856, 'ac

   48 | 0.000049 | 160000/160596 | 5.4270 | 8.4843 |||
val: {'recall': 0.996929, 'recall_grapheme': 0.996411, 'recall_vowel': 0.997428, 'recall_consonant': 0.997468, 'recall_word': 0.995234, 'acc_grapheme': 0.995925, 'acc_vowel': 0.998012, 'acc_consonant': 0.998335, 'acc_word': 0.995279, 'loss_grapheme': 0.202035, 'loss_vowel': 0.113786, 'loss_consonant': 0.090212, 'loss_word': 0.127162}
   49 | 0.000046 | 160000/160596 | 3.7988 | 7.9082 ||
val: {'recall': 0.997214, 'recall_grapheme': 0.99663, 'recall_vowel': 0.997785, 'recall_consonant': 0.997813, 'recall_word': 0.995588, 'acc_grapheme': 0.996049, 'acc_vowel': 0.998459, 'acc_consonant': 0.998385, 'acc_word': 0.995676, 'loss_grapheme': 0.16191, 'loss_vowel': 0.091391, 'loss_consonant': 0.066615, 'loss_word': 0.095959}
###>>>>> saved
   50 | 0.000044 | 160000/160596 | 13.4983 | 8.2749 |
val: {'recall': 0.997258, 'recall_grapheme': 0.996423, 'recall_vowel': 0.998467, 'recall_consonant': 0.99772, 'recall_word': 0.995466, 'acc_grapheme': 0

   69 | 0.000006 | 160000/160596 | 3.7578 | 8.0367 |||
val: {'recall': 0.997498, 'recall_grapheme': 0.996893, 'recall_vowel': 0.99822, 'recall_consonant': 0.997987, 'recall_word': 0.996026, 'acc_grapheme': 0.996372, 'acc_vowel': 0.998633, 'acc_consonant': 0.998534, 'acc_word': 0.996099, 'loss_grapheme': 0.132114, 'loss_vowel': 0.083359, 'loss_consonant': 0.058515, 'loss_word': 0.084024}
###>>>>> saved
   70 | 0.000005 | 160000/160596 | 11.5364 | 8.4926 ||
val: {'recall': 0.99712, 'recall_grapheme': 0.996228, 'recall_vowel': 0.998171, 'recall_consonant': 0.997855, 'recall_word': 0.996019, 'acc_grapheme': 0.996148, 'acc_vowel': 0.998658, 'acc_consonant': 0.998435, 'acc_word': 0.996148, 'loss_grapheme': 0.153361, 'loss_vowel': 0.090587, 'loss_consonant': 0.070349, 'loss_word': 0.092097}
   71 | 0.000004 | 160000/160596 | 8.9344 | 8.0685 |||
val: {'recall': 0.997023, 'recall_grapheme': 0.996197, 'recall_vowel': 0.997901, 'recall_consonant': 0.997796, 'recall_word': 0.995719, 'acc_grapheme'

   10 | 0.000143 | 160000/160596 | 5.0539 | 8.6628 ||
val: {'recall': 0.99605, 'recall_grapheme': 0.994534, 'recall_vowel': 0.997466, 'recall_consonant': 0.997664, 'recall_word': 0.994315, 'acc_grapheme': 0.994956, 'acc_vowel': 0.997838, 'acc_consonant': 0.998161, 'acc_word': 0.994359, 'loss_grapheme': 0.167985, 'loss_vowel': 0.100405, 'loss_consonant': 0.074104, 'loss_word': 0.104733}
   11 | 0.000142 | 160000/160596 | 13.9985 | 7.9454 |
val: {'recall': 0.996102, 'recall_grapheme': 0.994722, 'recall_vowel': 0.997261, 'recall_consonant': 0.997701, 'recall_word': 0.994891, 'acc_grapheme': 0.995005, 'acc_vowel': 0.998012, 'acc_consonant': 0.998211, 'acc_word': 0.995005, 'loss_grapheme': 0.16785, 'loss_vowel': 0.10523, 'loss_consonant': 0.068006, 'loss_word': 0.113005}
   12 | 0.000140 | 160000/160596 | 2.3755 | 8.3402 ||
val: {'recall': 0.996366, 'recall_grapheme': 0.995465, 'recall_vowel': 0.997259, 'recall_consonant': 0.997277, 'recall_word': 0.994439, 'acc_grapheme': 0.995105, 'acc_vo

KeyboardInterrupt: 

In [None]:
#save_model(model, model_file)

In [None]:
del model