In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image

In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
import albumentations as albu
'''
def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)
'''
def get_train_augs():
    return RandAugment(n=2, m=27)

In [9]:
#plt.imshow(x)

In [10]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        #print('###', img.shape)
        img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
    train_df = shuffle(train_df, random_state=1234)
    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [11]:
#train_loader, val_loader = get_train_val_loaders(dev_mode=True)

# model

In [12]:
#import pretrainedmodels

In [13]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [14]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [15]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [16]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

In [17]:
#model.last_linear.in_features

In [18]:
class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        #self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        #self.fix_input_layer()
        
    def fix_input_layer(self):
        if self.backbone_name in ['se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50', 'senet154', 'se_resnet152', 'nasnetmobile', 'mobilenet', 'nasnetalarge']:
            #self.backbone = eval(backbone_name)()
            #print(self.backbone.layer0.conv1)
            w = self.backbone.layer0.conv1.weight.data
            self.backbone.layer0.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
            #self.backbone.layer0.conv1.weight = torch.nn.Parameter(torch.cat((w, w[:, 2, :, :].unsqueeze(1)), dim=1))
            self.backbone.layer0.conv1.weight = torch.nn.Parameter(w[:, 0, :, :].unsqueeze(1))
        
    def logits(self, x):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        #x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [19]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [20]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [21]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [22]:
round(1/9, 6)

0.111111

In [23]:
import numpy as np
import sklearn.metrics
import torch


def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    #y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y_grapheme, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y_vowel, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y_consonant, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score

def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    
    return metrics

In [24]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    
    return loss0 + loss1 + loss2 #, loss0.item(), loss1.item(), loss2.item()

In [25]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2 = 0., 0., 0.
    preds0, preds1,preds2 = [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    
    return metrics
            

In [26]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [27]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [28]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [29]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [30]:
np.random.random()

0.9381385494239725

In [31]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam

In [32]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [33]:
def train(args, model):
    optimizer = make_optimizer(model)
    scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        train_cycle(args, model, optimizer, scheduler)
        scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

def train_cycle(args, model, optimizer, lr_scheduler):
    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    train_iter = 0

    for epoch in range(args.num_epochs):
        train_loss = 0

        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            batch_size = img.size(0)
            r = np.random.rand()

            if True:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            
            optimizer.zero_grad()
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            #loss.backward()
            lr_scheduler(optimizer, batch_idx, epoch)
            optimizer.step()            
            
            current_lr = get_lrs(optimizer)

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

        if True:#train_iter > 0 and train_iter % args.iter_val == 0:
            val_metrics = validate(model, val_loader)
            print('\nval:', val_metrics)
                
            if val_metrics[best_key] > best_metrics:
                best_metrics = val_metrics[best_key]
                save_model(model, model_file)
                print('###>>>>> saved')
                
            model.train()

In [34]:
args = Namespace()
args.backbone = 'efficientnet-b4'
args.ckp_name = 'model3_efficientnet-b4_fold1_randaugment.pth'
args.predict =False

args.base_lr = 4e-4
args.num_epochs = 150
args.warmup_epochs = 10
args.num_cycles = 100
args.batch_size = 768
args.val_batch_size = 1024
args.st_epochs = 10

args.beta = 1.5
args.cutmix_prob = 0.5

In [35]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=1)

(200840, 5)
(200840, 32332)
(160635, 5) (40205, 5)


In [36]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


Loaded pretrained weights for efficientnet-b4
model file: ./models/efficientnet-b4/model3_efficientnet-b4_fold1_randaugment.pth, exist: False


In [37]:
train(args, model)

CYCLE: 1
{'recall': 0.061073, 'recall_grapheme': 0.004175, 'recall_vowel': 0.087298, 'recall_consonant': 0.148645, 'acc_grapheme': 0.005497, 'acc_vowel': 0.173337, 'acc_consonant': 0.127049, 'loss_grapheme': 5.142862, 'loss_vowel': 2.358716, 'loss_consonant': 1.958236}


  'recall', 'true', average, warn_for)


    0 | 0.000040 | 160512/160635 | 9.0782 | 9.3719 |
val: {'recall': 0.081958, 'recall_grapheme': 0.005502, 'recall_vowel': 0.123567, 'recall_consonant': 0.193261, 'acc_grapheme': 0.008855, 'acc_vowel': 0.159781, 'acc_consonant': 0.328317, 'loss_grapheme': 5.112659, 'loss_vowel': 2.339818, 'loss_consonant': 1.852748}
###>>>>> saved
    1 | 0.000080 | 160512/160635 | 8.1126 | 8.5154 |
val: {'recall': 0.082445, 'recall_grapheme': 0.003051, 'recall_vowel': 0.234511, 'recall_consonant': 0.089168, 'acc_grapheme': 0.036911, 'acc_vowel': 0.324636, 'acc_consonant': 0.624176, 'loss_grapheme': 4.724176, 'loss_vowel': 1.94818, 'loss_consonant': 1.196133}
###>>>>> saved
    2 | 0.000120 | 160512/160635 | 7.7359 | 7.8308 |
val: {'recall': 0.198782, 'recall_grapheme': 0.010327, 'recall_vowel': 0.437957, 'recall_consonant': 0.336515, 'acc_grapheme': 0.077453, 'acc_vowel': 0.660341, 'acc_consonant': 0.662256, 'loss_grapheme': 4.501469, 'loss_vowel': 1.289796, 'loss_consonant': 0.948831}
###>>>>> saved

   25 | 0.000371 | 160512/160635 | 1.1343 | 4.2199 |
val: {'recall': 0.967237, 'recall_grapheme': 0.950462, 'recall_vowel': 0.983726, 'recall_consonant': 0.984298, 'acc_grapheme': 0.949832, 'acc_vowel': 0.984455, 'acc_consonant': 0.983932, 'loss_grapheme': 0.539282, 'loss_vowel': 0.31078, 'loss_consonant': 0.206629}
   26 | 0.000369 | 160512/160635 | 4.1102 | 4.1880 |
val: {'recall': 0.969215, 'recall_grapheme': 0.953893, 'recall_vowel': 0.985138, 'recall_consonant': 0.983936, 'acc_grapheme': 0.951698, 'acc_vowel': 0.984927, 'acc_consonant': 0.984703, 'loss_grapheme': 0.511045, 'loss_vowel': 0.275869, 'loss_consonant': 0.189064}
###>>>>> saved
   27 | 0.000367 | 160512/160635 | 5.4644 | 4.3561 |
val: {'recall': 0.968969, 'recall_grapheme': 0.953176, 'recall_vowel': 0.984163, 'recall_consonant': 0.985362, 'acc_grapheme': 0.952643, 'acc_vowel': 0.984256, 'acc_consonant': 0.983186, 'loss_grapheme': 0.554379, 'loss_vowel': 0.304919, 'loss_consonant': 0.198365}
   28 | 0.000364 | 160512/160

###>>>>> saved
   51 | 0.000293 | 160512/160635 | 5.0057 | 3.7229 |
val: {'recall': 0.975462, 'recall_grapheme': 0.963635, 'recall_vowel': 0.988466, 'recall_consonant': 0.986113, 'acc_grapheme': 0.963214, 'acc_vowel': 0.988459, 'acc_consonant': 0.988136, 'loss_grapheme': 0.400704, 'loss_vowel': 0.240818, 'loss_consonant': 0.159125}
   52 | 0.000289 | 160512/160635 | 4.2268 | 3.8135 |
val: {'recall': 0.975484, 'recall_grapheme': 0.963644, 'recall_vowel': 0.98739, 'recall_consonant': 0.987256, 'acc_grapheme': 0.963313, 'acc_vowel': 0.987788, 'acc_consonant': 0.988061, 'loss_grapheme': 0.406244, 'loss_vowel': 0.261317, 'loss_consonant': 0.174894}
   53 | 0.000285 | 160512/160635 | 3.5911 | 3.8886 |
val: {'recall': 0.976018, 'recall_grapheme': 0.965553, 'recall_vowel': 0.987949, 'recall_consonant': 0.985018, 'acc_grapheme': 0.963711, 'acc_vowel': 0.988708, 'acc_consonant': 0.98826, 'loss_grapheme': 0.415905, 'loss_vowel': 0.255179, 'loss_consonant': 0.173964}
   54 | 0.000281 | 160512/1606

###>>>>> saved
   77 | 0.000187 | 160512/160635 | 1.4795 | 3.6708 |
val: {'recall': 0.980193, 'recall_grapheme': 0.969962, 'recall_vowel': 0.989296, 'recall_consonant': 0.991555, 'acc_grapheme': 0.968984, 'acc_vowel': 0.990001, 'acc_consonant': 0.990051, 'loss_grapheme': 0.390127, 'loss_vowel': 0.265139, 'loss_consonant': 0.18018}
###>>>>> saved
   78 | 0.000183 | 160512/160635 | 2.8721 | 3.8874 |
val: {'recall': 0.980134, 'recall_grapheme': 0.971135, 'recall_vowel': 0.988264, 'recall_consonant': 0.990003, 'acc_grapheme': 0.968685, 'acc_vowel': 0.989454, 'acc_consonant': 0.98933, 'loss_grapheme': 0.398032, 'loss_vowel': 0.273116, 'loss_consonant': 0.182404}
   79 | 0.000179 | 160512/160635 | 1.6144 | 3.6570 |
val: {'recall': 0.980405, 'recall_grapheme': 0.971718, 'recall_vowel': 0.988641, 'recall_consonant': 0.989542, 'acc_grapheme': 0.969631, 'acc_vowel': 0.989703, 'acc_consonant': 0.989852, 'loss_grapheme': 0.377308, 'loss_vowel': 0.250882, 'loss_consonant': 0.167135}
###>>>>> saved


  103 | 0.000086 | 160512/160635 | 2.8523 | 3.6201 |
val: {'recall': 0.981634, 'recall_grapheme': 0.973065, 'recall_vowel': 0.989673, 'recall_consonant': 0.990732, 'acc_grapheme': 0.971844, 'acc_vowel': 0.990698, 'acc_consonant': 0.990573, 'loss_grapheme': 0.372433, 'loss_vowel': 0.255555, 'loss_consonant': 0.174102}
  104 | 0.000082 | 160512/160635 | 3.7834 | 3.5074 |
val: {'recall': 0.982068, 'recall_grapheme': 0.973663, 'recall_vowel': 0.990113, 'recall_consonant': 0.990834, 'acc_grapheme': 0.971844, 'acc_vowel': 0.990797, 'acc_consonant': 0.990822, 'loss_grapheme': 0.355891, 'loss_vowel': 0.244377, 'loss_consonant': 0.166154}
###>>>>> saved
  105 | 0.000079 | 160512/160635 | 2.6721 | 3.6968 |
val: {'recall': 0.982141, 'recall_grapheme': 0.973786, 'recall_vowel': 0.990325, 'recall_consonant': 0.990667, 'acc_grapheme': 0.971745, 'acc_vowel': 0.990872, 'acc_consonant': 0.990598, 'loss_grapheme': 0.36518, 'loss_vowel': 0.251141, 'loss_consonant': 0.169694}
###>>>>> saved
  106 | 0.0000

  129 | 0.000017 | 160512/160635 | 3.5663 | 3.7424 |
val: {'recall': 0.982811, 'recall_grapheme': 0.974709, 'recall_vowel': 0.990576, 'recall_consonant': 0.991251, 'acc_grapheme': 0.973063, 'acc_vowel': 0.991145, 'acc_consonant': 0.991444, 'loss_grapheme': 0.379844, 'loss_vowel': 0.263531, 'loss_consonant': 0.179277}
###>>>>> saved
  130 | 0.000016 | 160512/160635 | 4.0012 | 3.7222 |
val: {'recall': 0.982696, 'recall_grapheme': 0.974535, 'recall_vowel': 0.9907, 'recall_consonant': 0.991012, 'acc_grapheme': 0.972839, 'acc_vowel': 0.991319, 'acc_consonant': 0.991344, 'loss_grapheme': 0.374718, 'loss_vowel': 0.261527, 'loss_consonant': 0.177817}
  131 | 0.000014 | 160512/160635 | 4.2659 | 3.5768 |
val: {'recall': 0.982558, 'recall_grapheme': 0.974399, 'recall_vowel': 0.990448, 'recall_consonant': 0.990986, 'acc_grapheme': 0.973063, 'acc_vowel': 0.99117, 'acc_consonant': 0.991319, 'loss_grapheme': 0.354992, 'loss_vowel': 0.247703, 'loss_consonant': 0.169273}
  132 | 0.000013 | 160512/16063

    4 | 0.000199 | 160512/160635 | 4.2866 | 3.5615 |
val: {'recall': 0.982602, 'recall_grapheme': 0.974245, 'recall_vowel': 0.990443, 'recall_consonant': 0.991477, 'acc_grapheme': 0.972143, 'acc_vowel': 0.991096, 'acc_consonant': 0.991245, 'loss_grapheme': 0.373254, 'loss_vowel': 0.255696, 'loss_consonant': 0.173489}
    5 | 0.000239 | 160512/160635 | 2.2307 | 3.5325 |
val: {'recall': 0.981962, 'recall_grapheme': 0.974145, 'recall_vowel': 0.989869, 'recall_consonant': 0.989691, 'acc_grapheme': 0.97259, 'acc_vowel': 0.991046, 'acc_consonant': 0.991145, 'loss_grapheme': 0.33888, 'loss_vowel': 0.235884, 'loss_consonant': 0.162801}
    6 | 0.000278 | 160512/160635 | 4.5518 | 3.6522 |
val: {'recall': 0.982194, 'recall_grapheme': 0.974562, 'recall_vowel': 0.989694, 'recall_consonant': 0.989956, 'acc_grapheme': 0.97259, 'acc_vowel': 0.990698, 'acc_consonant': 0.990573, 'loss_grapheme': 0.386066, 'loss_vowel': 0.264915, 'loss_consonant': 0.182362}
    7 | 0.000318 | 160512/160635 | 3.0897 | 3.

   30 | 0.000361 | 072960/160635 | 3.0609 | 3.6239 |
val: {'recall': 0.983607, 'recall_grapheme': 0.976195, 'recall_vowel': 0.990136, 'recall_consonant': 0.991902, 'acc_grapheme': 0.975152, 'acc_vowel': 0.991444, 'acc_consonant': 0.991518, 'loss_grapheme': 0.355737, 'loss_vowel': 0.262766, 'loss_consonant': 0.177989}
   31 | 0.000357 | 160512/160635 | 3.3371 | 3.6396 |
val: {'recall': 0.984323, 'recall_grapheme': 0.977224, 'recall_vowel': 0.991192, 'recall_consonant': 0.991653, 'acc_grapheme': 0.975575, 'acc_vowel': 0.991742, 'acc_consonant': 0.991916, 'loss_grapheme': 0.364106, 'loss_vowel': 0.251111, 'loss_consonant': 0.172879}
   32 | 0.000354 | 160512/160635 | 4.4661 | 3.4681 |
val: {'recall': 0.984107, 'recall_grapheme': 0.977871, 'recall_vowel': 0.990204, 'recall_consonant': 0.990481, 'acc_grapheme': 0.975376, 'acc_vowel': 0.991145, 'acc_consonant': 0.991419, 'loss_grapheme': 0.324085, 'loss_vowel': 0.22759, 'loss_consonant': 0.16003}
   33 | 0.000351 | 160512/160635 | 3.8224 | 3

   56 | 0.000274 | 160512/160635 | 1.9706 | 3.4794 |
val: {'recall': 0.986499, 'recall_grapheme': 0.980921, 'recall_vowel': 0.992193, 'recall_consonant': 0.991961, 'acc_grapheme': 0.978883, 'acc_vowel': 0.992513, 'acc_consonant': 0.992638, 'loss_grapheme': 0.340048, 'loss_vowel': 0.251965, 'loss_consonant': 0.168291}
###>>>>> saved
   57 | 0.000270 | 160512/160635 | 3.4429 | 3.4568 |
val: {'recall': 0.985924, 'recall_grapheme': 0.980309, 'recall_vowel': 0.990998, 'recall_consonant': 0.99208, 'acc_grapheme': 0.978436, 'acc_vowel': 0.991892, 'acc_consonant': 0.992464, 'loss_grapheme': 0.294088, 'loss_vowel': 0.215891, 'loss_consonant': 0.149896}
   58 | 0.000266 | 160512/160635 | 4.1137 | 3.3725 |
val: {'recall': 0.985482, 'recall_grapheme': 0.979865, 'recall_vowel': 0.99074, 'recall_consonant': 0.991456, 'acc_grapheme': 0.978361, 'acc_vowel': 0.991419, 'acc_consonant': 0.991295, 'loss_grapheme': 0.320197, 'loss_vowel': 0.225581, 'loss_consonant': 0.153715}
   59 | 0.000262 | 160512/1606

   82 | 0.000167 | 160512/160635 | 4.1373 | 3.3973 |
val: {'recall': 0.987853, 'recall_grapheme': 0.983068, 'recall_vowel': 0.992725, 'recall_consonant': 0.992551, 'acc_grapheme': 0.981122, 'acc_vowel': 0.992936, 'acc_consonant': 0.992986, 'loss_grapheme': 0.327791, 'loss_vowel': 0.247594, 'loss_consonant': 0.162827}
###>>>>> saved
   83 | 0.000163 | 160512/160635 | 3.6404 | 3.3588 |
val: {'recall': 0.987524, 'recall_grapheme': 0.982307, 'recall_vowel': 0.992643, 'recall_consonant': 0.992842, 'acc_grapheme': 0.980774, 'acc_vowel': 0.993235, 'acc_consonant': 0.993085, 'loss_grapheme': 0.29357, 'loss_vowel': 0.221547, 'loss_consonant': 0.150074}
   84 | 0.000158 | 160512/160635 | 3.0715 | 3.3186 |
val: {'recall': 0.987242, 'recall_grapheme': 0.982246, 'recall_vowel': 0.991801, 'recall_consonant': 0.992676, 'acc_grapheme': 0.980301, 'acc_vowel': 0.992663, 'acc_consonant': 0.992862, 'loss_grapheme': 0.300316, 'loss_vowel': 0.226131, 'loss_consonant': 0.154012}
   85 | 0.000154 | 160512/160

KeyboardInterrupt: 

In [None]:
#save_model(model, model_file)

In [46]:
del model