In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image
from utils import bn_update, moving_average, copy_model


In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
#import albumentations as albu
def get_train_augs():
    return RandAugment(n=2, m=27)

In [9]:
#plt.imshow(x)

In [10]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [11]:
#train_loader, val_loader = get_train_val_loaders()

In [12]:
#for x in train_loader:
#    print(x)
#    break

# model

In [13]:
#import pretrainedmodels

In [14]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [15]:
from argparse import Namespace
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

class BengaliNet(nn.Module):
    """
    EfficientNet B0-B8.
    Args:
        cfg (CfgNode): configs
    """
    def __init__(self, cfg):
        super(BengaliNet, self).__init__()
        model_name = cfg.MODEL_NAME
        pretrained = cfg.PRETRAINED
        input_channels = cfg.IN_CHANNELS
        pool_type = cfg.POOL_TYPE
        drop_connect_rate = cfg.DROP_CONNECT
        self.drop_rate = cfg.DROPOUT
        cls_head = cfg.CLS_HEAD
        num_total_classes = cfg.NUM_GRAPHEME_CLASSES + cfg.NUM_VOWEL_CLASSES + cfg.NUM_CONSONANT_CLASSES \
            + cfg.NUM_WORD_CLASSES

        backbone = timm.create_model(
            model_name=model_name,
            pretrained=pretrained,
            in_chans=input_channels,
            drop_connect_rate=drop_connect_rate,
        )
        self.conv_stem = backbone.conv_stem
        self.bn1 = backbone.bn1
        self.act1 = backbone.act1
        ### Original blocks ###
        for i in range(len((backbone.blocks))):
            setattr(self, "block{}".format(str(i)), backbone.blocks[i])
        self.conv_head = backbone.conv_head
        self.bn2 = backbone.bn2
        self.act2 = backbone.act2
        self.aux_block5 = backbone.blocks[5]
        self.aux_num_features = self.block5[-1].bn3.num_features
        self.aux_head4 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn4 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act4 = Swish()
        self.aux_head5 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn5 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act5 = Swish()
        self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
        self.num_features = backbone.num_features * self.global_pool.feat_mult()
        assert cls_head == 'linear'
        if cls_head == "linear":
            ### Baseline head ###
            self.fc = nn.Linear(self.num_features, num_total_classes)            
            self.aux_fc1 = nn.Linear(self.aux_num_features*4, num_total_classes)
            self.aux_fc2 = nn.Linear(self.aux_num_features*4, num_total_classes)
            
            for fc in [self.fc, self.aux_fc1, self.aux_fc2]:
                nn.init.zeros_(fc.bias.data)
        elif cls_head == "norm_softmax":
            ### NormSoftmax ###
            self.grapheme_fc = NormSoftmax(self.num_features, num_grapheme_classes)
            self.consonant_fc = NormSoftmax(self.num_features, num_consonant_classes)
            self.vowel_fc = NormSoftmax(self.num_features, num_vowel_classes)
        # Replace with Mish activation
        if cfg.MODEL_ACTIVATION == "mish":
            convert_swish_to_mish(self)
        del backbone

    def _features(self, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.block0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x); b4 = x
        x = self.block5(x); b4 = self.aux_block5(b4); b5 = x
        x = self.block6(x)
        x = self.conv_head(x); b4 = self.aux_head4(b4); b5 = self.aux_head5(b5)
        x = self.bn2(x); b4 = self.bn4(b4); b5 = self.bn5(b5)
        x = self.act2(x); b4 = self.act4(b4); b5 = self.act5(b5)
        return b4, b5, x

    def forward(self, x):
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)

        # _, _, x = self._features(x)
        b4, b5, x = self._features(x)
        x = self.global_pool(x); b4 = self.global_pool(b4); b5 = self.global_pool(b5)
        x = torch.flatten(x, 1); b4 = torch.flatten(b4, 1); b5 = torch.flatten(b5, 1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        logits = self.fc(x)
        
        aux_logits1 = self.aux_fc1(b4)
        aux_logits2 = self.aux_fc2(b5)
        
        return logits, aux_logits1, aux_logits2

In [16]:
MODEL_DIR = './model4-ckps'
def create_model(cfg):
    model = BengaliNet(cfg)
    model_file = os.path.join(MODEL_DIR, cfg.MODEL_NAME, cfg.CKP_NAME)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [17]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [18]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [19]:
round(1/9, 6)

0.111111

In [20]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, preds3, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2) == len(preds3)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    recall_word = sklearn.metrics.recall_score(preds3, y[:, 3], average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    metrics['recall_word'] = round(recall_word, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    metrics['acc_word'] = round((preds3 == y[:, 3]).sum() / len(y), 6)    
    
    return metrics

In [21]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    loss3 = F.cross_entropy(outputs[3], y_true[:, 3], reduction='mean')
    
    return loss0 + loss1 + loss2 + loss3 #, loss0.item(), loss1.item(), loss2.item()

In [22]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2, loss3 = 0., 0., 0., 0.
    preds0, preds1, preds2, preds3 = [], [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs, _, _ = model(x)
            outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            preds3.append(torch.max(outputs[3], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            loss3 += F.cross_entropy(outputs[3], y[:, 3], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    preds3 = torch.cat(preds3, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, preds3, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    metrics['loss_word'] = round(loss3 / val_loader.num, 6)
    
    return metrics
            

In [23]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [24]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [25]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [26]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [27]:
def rand_bbox_new(size, lam):
    H = size[2]
    W = size[3]

    x_margin_rate = 0.2

    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * (1-x_margin_rate*2) * cut_rat)
    cut_h = np.int(H * cut_rat)
    
    min_x_center = np.int(W * x_margin_rate + cut_w / 2)
    max_x_center = np.int(W * (1-x_margin_rate) - cut_w / 2)
    #print(min_x_center, max_x_center, lam, cut_w)
    min_y_center = cut_h // 2
    max_y_center = H - cut_h // 2
    if max_y_center == min_y_center:
        max_y_center += 1

    # uniform
    cx = np.random.randint(min_x_center, max_x_center)
    cy = np.random.randint(min_y_center, max_y_center)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    
    #print(bbx1, bbx2, bby1, bby2)

    return bbx1, bby1, bbx2, bby2

In [28]:
np.random.random()

0.42107607895804633

In [29]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from gridmask import GridMask

In [30]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [31]:
def train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid):
    train_loss = 0

    for batch_idx, (img, orig_img, targets) in enumerate(train_loader):
        img, orig_img, targets  = img.cuda(), orig_img.cuda(), targets.cuda()
        batch_size = img.size(0)
        r = np.random.rand()

        if r < 0.3:
            # generate mixed sample
            lam = np.random.beta(args.beta, args.beta)
            rand_index = torch.randperm(img.size()[0]).cuda()
            target_a = targets
            target_b = targets[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
            #img[:, :, bby1:bby2, bbx1:bbx2] = img[rand_index, :, bby1:bby2, bbx1:bbx2] #for new cutmix
            img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
            
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
            # compute output
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            loss_aux1 = criterion(outputs_aux1, target_a) * lam + criterion(outputs_aux1, target_b) * (1. - lam)
            loss_aux2 = criterion(outputs_aux2, target_a) * lam + criterion(outputs_aux2, target_b) * (1. - lam)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        elif r > 0.7:
            img = grid(img)
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, targets)
            loss_aux1 = criterion(outputs_aux1, targets)
            loss_aux2 = criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        else:
            orig_img, targets = mixup(orig_img, targets)
            outputs, outputs_aux1, outputs_aux2 = model(orig_img)
            loss_primary = mixup_criterion(outputs, targets)
            loss_aux1 = mixup_criterion(outputs_aux1, targets)
            loss_aux2 = mixup_criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
            #loss = criterion(outputs, targets)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        lr_scheduler(optimizer, batch_idx, epoch)
        optimizer.step()            
        
        current_lr = get_lrs(optimizer)

        train_loss += loss.item()
        print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
            epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
            loss.item(), train_loss/(batch_idx+1)), end='')


In [32]:
best_metrics = 0.

def validate_and_save(model, model_file, val_loader, save=False):
    global best_metrics
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics:
        best_metrics = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()
    

def train(args):
    model, model_file = create_model(cfg)
    model = model.cuda()

    swa_model, _ = create_model(cfg)
    swa_model = swa_model.cuda()
    swa_model_file = model_file + '_swa'

    optimizer = make_optimizer(model)
    lr_scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    [model, swa_model], optimizer = amp.initialize([model, swa_model], optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        swa_model = nn.DataParallel(swa_model)
    
    validate_and_save(model, model_file, val_loader, save=False)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

        for epoch in range(args.num_epochs):
            grid.set_prob(epoch, args.st_epochs)
            train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid)
            validate_and_save(model, model_file, val_loader, save=True)
            
            if (epoch+1) == args.swa_start and cycle == 1:
                copy_model(swa_model, model)
                swa_n = 0
            if (epoch+1) >= args.swa_start and (epoch+1) % args.swa_freq == 0:
                print('SWA>>>:')
                moving_average(swa_model, model, 1.0 / (swa_n + 1))
                swa_n += 1
                bn_update(train_loader, swa_model)
                validate_and_save(swa_model, swa_model_file, val_loader, save=True)

        # reset scheduler at each cycle
        lr_scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

In [33]:
cfg = Namespace()
cfg.MODEL_NAME = 'tf_efficientnet_b4'
cfg.PRETRAINED = True
cfg.IN_CHANNELS = 1
cfg.POOL_TYPE = 'avg'
cfg.CLS_HEAD = 'linear'
cfg.MODEL_ACTIVATION = 'swish'
cfg.DROP_CONNECT = 0.2
cfg.DROPOUT= 0.
cfg.NUM_WORD_CLASSES = 1295
cfg.NUM_GRAPHEME_CLASSES = 168
cfg.NUM_VOWEL_CLASSES = 11
cfg.NUM_CONSONANT_CLASSES = 7
cfg.CKP_NAME = 'model4_eb4_fold1.pth'

In [34]:
#model, model_file = create_model(cfg)
#model(torch.randn(2,1,137,236))[0].size()

In [35]:
args = Namespace()

args.base_lr = 1e-4
args.num_epochs = 80
args.warmup_epochs = 5
args.num_cycles = 100
args.batch_size = 640
args.val_batch_size = 1024
args.st_epochs = 5

args.swa_start = 10
args.swa_freq = 2

args.beta = 1.0
args.cutmix_prob = 0.5

In [36]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=1)

(200840, 6)
(200840, 32332)
(160635, 6) (40205, 6)


In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold1.pth...

val: {'recall': 0.998193, 'recall_grapheme': 0.997701, 'recall_vowel': 0.998464, 'recall_consonant': 0.998909, 'recall_word': 0.996709, 'acc_grapheme': 0.997413, 'acc_vowel': 0.998508, 'acc_consonant': 0.999204, 'acc_word': 0.996667, 'loss_grapheme': 0.012457, 'loss_vowel': 0.007251, 'loss_consonant': 0.004512, 'loss_word': 0.01507}
CYCLE: 1
    0 | 0.000020 | 160000/160635 | 3.5332 | 5.2691 |||
val: {'recall': 0.997196, 'recall_grapheme': 0.996556, 'recall_vowel': 0.997956, 'recall_consonant': 0.997713, 'recall_word': 0.995322, 'acc_grapheme': 0.995996, 'acc_vowel': 0.998209, 'acc_consonant': 0.998881, 'acc_word': 0.995324, 'loss_grapheme': 0.032261, 'loss_vowel': 0.02544, 'loss_consonant': 0.019148, 

  0%|          | 0/250 [00:00<?, ?it/s]


val: {'recall': 0.997487, 'recall_grapheme': 0.996863, 'recall_vowel': 0.998404, 'recall_consonant': 0.997817, 'recall_word': 0.995737, 'acc_grapheme': 0.996393, 'acc_vowel': 0.998508, 'acc_consonant': 0.99908, 'acc_word': 0.995747, 'loss_grapheme': 0.021618, 'loss_vowel': 0.014295, 'loss_consonant': 0.011358, 'loss_word': 0.022621}
SWA>>>:


100%|██████████| 250/250 [01:59<00:00,  2.19it/s]



val: {'recall': 0.997891, 'recall_grapheme': 0.997168, 'recall_vowel': 0.998329, 'recall_consonant': 0.998897, 'recall_word': 0.996412, 'acc_grapheme': 0.997065, 'acc_vowel': 0.998483, 'acc_consonant': 0.999179, 'acc_word': 0.996418, 'loss_grapheme': 0.013369, 'loss_vowel': 0.007082, 'loss_consonant': 0.004778, 'loss_word': 0.015758}
   10 | 0.000095 | 160000/160635 | 4.3195 | 6.2358 ||
val: {'recall': 0.997255, 'recall_grapheme': 0.996305, 'recall_vowel': 0.998316, 'recall_consonant': 0.998092, 'recall_word': 0.995559, 'acc_grapheme': 0.995921, 'acc_vowel': 0.998284, 'acc_consonant': 0.99898, 'acc_word': 0.995548, 'loss_grapheme': 0.027109, 'loss_vowel': 0.020022, 'loss_consonant': 0.015796, 'loss_word': 0.027285}
   11 | 0.000095 | 160000/160635 | 0.4096 | 5.1606 ||

  0%|          | 0/250 [00:00<?, ?it/s]


val: {'recall': 0.997474, 'recall_grapheme': 0.997032, 'recall_vowel': 0.99809, 'recall_consonant': 0.997741, 'recall_word': 0.996109, 'acc_grapheme': 0.996667, 'acc_vowel': 0.998433, 'acc_consonant': 0.999129, 'acc_word': 0.99607, 'loss_grapheme': 0.018316, 'loss_vowel': 0.010662, 'loss_consonant': 0.008136, 'loss_word': 0.019797}
SWA>>>:


100%|██████████| 250/250 [01:59<00:00,  2.15it/s]



val: {'recall': 0.998001, 'recall_grapheme': 0.99733, 'recall_vowel': 0.998504, 'recall_consonant': 0.998839, 'recall_word': 0.996619, 'acc_grapheme': 0.997289, 'acc_vowel': 0.998582, 'acc_consonant': 0.999154, 'acc_word': 0.996568, 'loss_grapheme': 0.012475, 'loss_vowel': 0.006634, 'loss_consonant': 0.004535, 'loss_word': 0.01493}
   12 | 0.000094 | 160000/160635 | 4.6426 | 5.3854 ||
val: {'recall': 0.997566, 'recall_grapheme': 0.997093, 'recall_vowel': 0.998349, 'recall_consonant': 0.99773, 'recall_word': 0.996151, 'acc_grapheme': 0.996692, 'acc_vowel': 0.998458, 'acc_consonant': 0.99908, 'acc_word': 0.996145, 'loss_grapheme': 0.019946, 'loss_vowel': 0.013112, 'loss_consonant': 0.009802, 'loss_word': 0.021531}
   13 | 0.000093 | 160000/160635 | 6.9116 | 5.5072 |||

  0%|          | 0/250 [00:00<?, ?it/s]


val: {'recall': 0.996988, 'recall_grapheme': 0.996158, 'recall_vowel': 0.997463, 'recall_consonant': 0.998174, 'recall_word': 0.995149, 'acc_grapheme': 0.995797, 'acc_vowel': 0.997886, 'acc_consonant': 0.998806, 'acc_word': 0.99515, 'loss_grapheme': 0.032276, 'loss_vowel': 0.025127, 'loss_consonant': 0.019391, 'loss_word': 0.032221}
SWA>>>:


100%|██████████| 250/250 [01:58<00:00,  2.17it/s]



val: {'recall': 0.998074, 'recall_grapheme': 0.997475, 'recall_vowel': 0.998482, 'recall_consonant': 0.998866, 'recall_word': 0.996555, 'acc_grapheme': 0.997289, 'acc_vowel': 0.998582, 'acc_consonant': 0.999154, 'acc_word': 0.996518, 'loss_grapheme': 0.012376, 'loss_vowel': 0.006871, 'loss_consonant': 0.004634, 'loss_word': 0.015034}


KeyboardInterrupt: 

In [37]:
train(args, model)

CYCLE: 1
{'recall': 0.99719, 'recall_grapheme': 0.996382, 'recall_vowel': 0.997862, 'recall_consonant': 0.998134, 'recall_word': 0.99567, 'acc_grapheme': 0.995871, 'acc_vowel': 0.998085, 'acc_consonant': 0.99898, 'acc_word': 0.995672, 'loss_grapheme': 0.022536, 'loss_vowel': 0.014581, 'loss_consonant': 0.010935, 'loss_word': 0.021575}
    0 | 0.000060 | 160000/160635 | 4.1378 | 5.8142 ||
val: {'recall': 0.996434, 'recall_grapheme': 0.995432, 'recall_vowel': 0.997529, 'recall_consonant': 0.997342, 'recall_word': 0.995104, 'acc_grapheme': 0.995001, 'acc_vowel': 0.997513, 'acc_consonant': 0.998483, 'acc_word': 0.99515, 'loss_grapheme': 0.065838, 'loss_vowel': 0.050984, 'loss_consonant': 0.03582, 'loss_word': 0.04643}
    1 | 0.000120 | 160000/160635 | 0.3627 | 5.8495 ||
val: {'recall': 0.996762, 'recall_grapheme': 0.995615, 'recall_vowel': 0.99794, 'recall_consonant': 0.997876, 'recall_word': 0.995165, 'acc_grapheme': 0.995075, 'acc_vowel': 0.997662, 'acc_consonant': 0.998632, 'acc_word':

   21 | 0.000247 | 160000/160635 | 12.1958 | 6.2732 ||
val: {'recall': 0.996551, 'recall_grapheme': 0.995729, 'recall_vowel': 0.997595, 'recall_consonant': 0.997152, 'recall_word': 0.994702, 'acc_grapheme': 0.994876, 'acc_vowel': 0.997562, 'acc_consonant': 0.998433, 'acc_word': 0.994702, 'loss_grapheme': 0.059388, 'loss_vowel': 0.043675, 'loss_consonant': 0.03106, 'loss_word': 0.04741}
   22 | 0.000243 | 160000/160635 | 0.6093 | 6.0554 ||
val: {'recall': 0.996553, 'recall_grapheme': 0.995726, 'recall_vowel': 0.998072, 'recall_consonant': 0.996688, 'recall_word': 0.994773, 'acc_grapheme': 0.9951, 'acc_vowel': 0.997886, 'acc_consonant': 0.998582, 'acc_word': 0.994727, 'loss_grapheme': 0.052127, 'loss_vowel': 0.039893, 'loss_consonant': 0.030727, 'loss_word': 0.039822}
   23 | 0.000238 | 160000/160635 | 0.5189 | 5.5368 ||
val: {'recall': 0.996704, 'recall_grapheme': 0.995564, 'recall_vowel': 0.99786, 'recall_consonant': 0.997829, 'recall_word': 0.995001, 'acc_grapheme': 0.995274, 'acc_vow

   42 | 0.000132 | 160000/160635 | 0.5902 | 5.4162 |||
val: {'recall': 0.996919, 'recall_grapheme': 0.996034, 'recall_vowel': 0.997751, 'recall_consonant': 0.997856, 'recall_word': 0.99521, 'acc_grapheme': 0.995622, 'acc_vowel': 0.99801, 'acc_consonant': 0.998756, 'acc_word': 0.9952, 'loss_grapheme': 0.04251, 'loss_vowel': 0.03518, 'loss_consonant': 0.025374, 'loss_word': 0.035236}
   43 | 0.000127 | 160000/160635 | 9.3936 | 6.0009 ||
val: {'recall': 0.99703, 'recall_grapheme': 0.996459, 'recall_vowel': 0.99764, 'recall_consonant': 0.997563, 'recall_word': 0.995395, 'acc_grapheme': 0.995672, 'acc_vowel': 0.997712, 'acc_consonant': 0.998881, 'acc_word': 0.995324, 'loss_grapheme': 0.058771, 'loss_vowel': 0.04855, 'loss_consonant': 0.03469, 'loss_word': 0.046771}
   44 | 0.000121 | 160000/160635 | 0.4705 | 5.8303 ||
val: {'recall': 0.997126, 'recall_grapheme': 0.996633, 'recall_vowel': 0.997846, 'recall_consonant': 0.997394, 'recall_word': 0.995704, 'acc_grapheme': 0.99607, 'acc_vowel': 0

   63 | 0.000029 | 160000/160635 | 13.4482 | 5.8638 ||
val: {'recall': 0.996825, 'recall_grapheme': 0.996039, 'recall_vowel': 0.997694, 'recall_consonant': 0.997528, 'recall_word': 0.995306, 'acc_grapheme': 0.995598, 'acc_vowel': 0.99796, 'acc_consonant': 0.998856, 'acc_word': 0.995274, 'loss_grapheme': 0.04512, 'loss_vowel': 0.038703, 'loss_consonant': 0.028448, 'loss_word': 0.036169}
   64 | 0.000025 | 160000/160635 | 7.0196 | 6.2755 ||
val: {'recall': 0.996891, 'recall_grapheme': 0.996094, 'recall_vowel': 0.997755, 'recall_consonant': 0.99762, 'recall_word': 0.995298, 'acc_grapheme': 0.995722, 'acc_vowel': 0.997911, 'acc_consonant': 0.99893, 'acc_word': 0.995324, 'loss_grapheme': 0.053122, 'loss_vowel': 0.046575, 'loss_consonant': 0.033558, 'loss_word': 0.039225}
   65 | 0.000022 | 160000/160635 | 14.3695 | 5.9077 |
val: {'recall': 0.996719, 'recall_grapheme': 0.996104, 'recall_vowel': 0.997337, 'recall_consonant': 0.997331, 'recall_word': 0.994826, 'acc_grapheme': 0.995374, 'acc_vo

    4 | 0.000297 | 160000/160635 | 0.4369 | 5.6495 ||
val: {'recall': 0.99713, 'recall_grapheme': 0.996358, 'recall_vowel': 0.998175, 'recall_consonant': 0.99763, 'recall_word': 0.995337, 'acc_grapheme': 0.995996, 'acc_vowel': 0.99801, 'acc_consonant': 0.998607, 'acc_word': 0.995299, 'loss_grapheme': 0.021639, 'loss_vowel': 0.014439, 'loss_consonant': 0.010829, 'loss_word': 0.02167}
    5 | 0.000296 | 160000/160635 | 12.6550 | 6.2098 |
val: {'recall': 0.996045, 'recall_grapheme': 0.995383, 'recall_vowel': 0.997299, 'recall_consonant': 0.996116, 'recall_word': 0.99476, 'acc_grapheme': 0.995001, 'acc_vowel': 0.997687, 'acc_consonant': 0.998632, 'acc_word': 0.994752, 'loss_grapheme': 0.041029, 'loss_vowel': 0.031883, 'loss_consonant': 0.023205, 'loss_word': 0.032587}
    6 | 0.000294 | 160000/160635 | 8.3512 | 6.3980 |||
val: {'recall': 0.996431, 'recall_grapheme': 0.995546, 'recall_vowel': 0.997308, 'recall_consonant': 0.997324, 'recall_word': 0.99438, 'acc_grapheme': 0.994926, 'acc_vowe

   25 | 0.000228 | 160000/160635 | 11.9801 | 6.0955 |
val: {'recall': 0.99624, 'recall_grapheme': 0.995238, 'recall_vowel': 0.99756, 'recall_consonant': 0.996924, 'recall_word': 0.994447, 'acc_grapheme': 0.995001, 'acc_vowel': 0.997761, 'acc_consonant': 0.998458, 'acc_word': 0.994429, 'loss_grapheme': 0.039731, 'loss_vowel': 0.031945, 'loss_consonant': 0.026598, 'loss_word': 0.032693}
   26 | 0.000223 | 160000/160635 | 5.4470 | 5.7559 ||
val: {'recall': 0.996501, 'recall_grapheme': 0.995579, 'recall_vowel': 0.997807, 'recall_consonant': 0.997036, 'recall_word': 0.994917, 'acc_grapheme': 0.995349, 'acc_vowel': 0.99801, 'acc_consonant': 0.998458, 'acc_word': 0.994827, 'loss_grapheme': 0.051517, 'loss_vowel': 0.041801, 'loss_consonant': 0.032477, 'loss_word': 0.045697}
   27 | 0.000218 | 160000/160635 | 5.6375 | 5.4967 ||
val: {'recall': 0.996877, 'recall_grapheme': 0.996121, 'recall_vowel': 0.997963, 'recall_consonant': 0.997303, 'recall_word': 0.995049, 'acc_grapheme': 0.995274, 'acc_vo

   46 | 0.000109 | 160000/160635 | 9.8065 | 5.4907 ||
val: {'recall': 0.997194, 'recall_grapheme': 0.996399, 'recall_vowel': 0.997983, 'recall_consonant': 0.997995, 'recall_word': 0.995271, 'acc_grapheme': 0.995747, 'acc_vowel': 0.99806, 'acc_consonant': 0.998707, 'acc_word': 0.995274, 'loss_grapheme': 0.044276, 'loss_vowel': 0.037074, 'loss_consonant': 0.027793, 'loss_word': 0.033986}
   47 | 0.000104 | 160000/160635 | 0.6937 | 5.9416 ||
val: {'recall': 0.997561, 'recall_grapheme': 0.996697, 'recall_vowel': 0.998234, 'recall_consonant': 0.998614, 'recall_word': 0.995896, 'acc_grapheme': 0.996319, 'acc_vowel': 0.998309, 'acc_consonant': 0.99898, 'acc_word': 0.995896, 'loss_grapheme': 0.02396, 'loss_vowel': 0.017194, 'loss_consonant': 0.013379, 'loss_word': 0.023348}
   48 | 0.000098 | 160000/160635 | 10.4296 | 5.7264 |
val: {'recall': 0.997061, 'recall_grapheme': 0.996126, 'recall_vowel': 0.997913, 'recall_consonant': 0.998079, 'recall_word': 0.995004, 'acc_grapheme': 0.995399, 'acc_vo

   67 | 0.000016 | 160000/160635 | 10.4374 | 5.9359 |
val: {'recall': 0.99747, 'recall_grapheme': 0.996933, 'recall_vowel': 0.998257, 'recall_consonant': 0.997755, 'recall_word': 0.99564, 'acc_grapheme': 0.99617, 'acc_vowel': 0.998284, 'acc_consonant': 0.999055, 'acc_word': 0.995647, 'loss_grapheme': 0.032994, 'loss_vowel': 0.026946, 'loss_consonant': 0.021132, 'loss_word': 0.029545}
   68 | 0.000014 | 160000/160635 | 12.8787 | 5.1190 |
val: {'recall': 0.997087, 'recall_grapheme': 0.996493, 'recall_vowel': 0.997766, 'recall_consonant': 0.997596, 'recall_word': 0.995087, 'acc_grapheme': 0.995598, 'acc_vowel': 0.99801, 'acc_consonant': 0.998856, 'acc_word': 0.995075, 'loss_grapheme': 0.063344, 'loss_vowel': 0.056675, 'loss_consonant': 0.040807, 'loss_word': 0.046101}
   69 | 0.000011 | 160000/160635 | 0.4144 | 6.2776 |||
val: {'recall': 0.997692, 'recall_grapheme': 0.997121, 'recall_vowel': 0.998292, 'recall_consonant': 0.998235, 'recall_word': 0.995898, 'acc_grapheme': 0.996393, 'acc_vo

    8 | 0.000291 | 160000/160635 | 16.9044 | 5.7784 |
val: {'recall': 0.996995, 'recall_grapheme': 0.996165, 'recall_vowel': 0.997812, 'recall_consonant': 0.997836, 'recall_word': 0.995302, 'acc_grapheme': 0.995946, 'acc_vowel': 0.99806, 'acc_consonant': 0.998881, 'acc_word': 0.995299, 'loss_grapheme': 0.028182, 'loss_vowel': 0.020619, 'loss_consonant': 0.015741, 'loss_word': 0.026441}
    9 | 0.000289 | 160000/160635 | 0.4810 | 5.7326 |||
val: {'recall': 0.997221, 'recall_grapheme': 0.996417, 'recall_vowel': 0.998064, 'recall_consonant': 0.997987, 'recall_word': 0.995315, 'acc_grapheme': 0.995797, 'acc_vowel': 0.99801, 'acc_consonant': 0.998906, 'acc_word': 0.995324, 'loss_grapheme': 0.023892, 'loss_vowel': 0.016276, 'loss_consonant': 0.01127, 'loss_word': 0.02406}
   10 | 0.000286 | 160000/160635 | 7.0158 | 5.9027 |||
val: {'recall': 0.996588, 'recall_grapheme': 0.995294, 'recall_vowel': 0.997828, 'recall_consonant': 0.997935, 'recall_word': 0.994137, 'acc_grapheme': 0.994926, 'acc_v

   29 | 0.000207 | 160000/160635 | 13.4666 | 5.8615 |
val: {'recall': 0.996277, 'recall_grapheme': 0.995514, 'recall_vowel': 0.997339, 'recall_consonant': 0.996742, 'recall_word': 0.994625, 'acc_grapheme': 0.994876, 'acc_vowel': 0.997488, 'acc_consonant': 0.998632, 'acc_word': 0.994553, 'loss_grapheme': 0.062222, 'loss_vowel': 0.057674, 'loss_consonant': 0.0427, 'loss_word': 0.048799}
   30 | 0.000202 | 160000/160635 | 12.9628 | 5.6233 |
val: {'recall': 0.997201, 'recall_grapheme': 0.996381, 'recall_vowel': 0.998176, 'recall_consonant': 0.997866, 'recall_word': 0.995365, 'acc_grapheme': 0.995672, 'acc_vowel': 0.99811, 'acc_consonant': 0.99898, 'acc_word': 0.995299, 'loss_grapheme': 0.031323, 'loss_vowel': 0.025269, 'loss_consonant': 0.018557, 'loss_word': 0.028789}
   31 | 0.000196 | 160000/160635 | 0.2912 | 5.7281 |||
val: {'recall': 0.997484, 'recall_grapheme': 0.996745, 'recall_vowel': 0.997983, 'recall_consonant': 0.998464, 'recall_word': 0.995805, 'acc_grapheme': 0.996369, 'acc_vo

   50 | 0.000087 | 160000/160635 | 0.4565 | 5.7895 ||
val: {'recall': 0.997172, 'recall_grapheme': 0.996568, 'recall_vowel': 0.99805, 'recall_consonant': 0.997501, 'recall_word': 0.995769, 'acc_grapheme': 0.996344, 'acc_vowel': 0.998408, 'acc_consonant': 0.99893, 'acc_word': 0.995772, 'loss_grapheme': 0.030396, 'loss_vowel': 0.024626, 'loss_consonant': 0.018384, 'loss_word': 0.026732}
   51 | 0.000082 | 160000/160635 | 15.2841 | 5.5126 |
val: {'recall': 0.996983, 'recall_grapheme': 0.996295, 'recall_vowel': 0.99788, 'recall_consonant': 0.997463, 'recall_word': 0.99554, 'acc_grapheme': 0.995896, 'acc_vowel': 0.998184, 'acc_consonant': 0.99893, 'acc_word': 0.995548, 'loss_grapheme': 0.029333, 'loss_vowel': 0.023453, 'loss_consonant': 0.018837, 'loss_word': 0.026721}
   52 | 0.000077 | 160000/160635 | 0.4116 | 5.5332 ||
val: {'recall': 0.997146, 'recall_grapheme': 0.996466, 'recall_vowel': 0.998143, 'recall_consonant': 0.997509, 'recall_word': 0.995642, 'acc_grapheme': 0.995921, 'acc_vowe

   71 | 0.000007 | 160000/160635 | 3.8256 | 5.1787 ||
val: {'recall': 0.997136, 'recall_grapheme': 0.996515, 'recall_vowel': 0.997964, 'recall_consonant': 0.99755, 'recall_word': 0.995205, 'acc_grapheme': 0.995797, 'acc_vowel': 0.997886, 'acc_consonant': 0.99893, 'acc_word': 0.995125, 'loss_grapheme': 0.039368, 'loss_vowel': 0.03063, 'loss_consonant': 0.022482, 'loss_word': 0.035292}
   72 | 0.000006 | 160000/160635 | 13.0108 | 5.6229 ||
val: {'recall': 0.997141, 'recall_grapheme': 0.996498, 'recall_vowel': 0.998005, 'recall_consonant': 0.997565, 'recall_word': 0.995204, 'acc_grapheme': 0.995846, 'acc_vowel': 0.998184, 'acc_consonant': 0.99898, 'acc_word': 0.9952, 'loss_grapheme': 0.056504, 'loss_vowel': 0.044744, 'loss_consonant': 0.032248, 'loss_word': 0.04706}
   73 | 0.000004 | 160000/160635 | 12.0829 | 5.5248 |
val: {'recall': 0.997391, 'recall_grapheme': 0.996965, 'recall_vowel': 0.998167, 'recall_consonant': 0.997467, 'recall_word': 0.995994, 'acc_grapheme': 0.996617, 'acc_vowel

   12 | 0.000281 | 160000/160635 | 0.6086 | 5.4475 ||
val: {'recall': 0.996957, 'recall_grapheme': 0.996197, 'recall_vowel': 0.99735, 'recall_consonant': 0.998085, 'recall_word': 0.995213, 'acc_grapheme': 0.995772, 'acc_vowel': 0.998135, 'acc_consonant': 0.998856, 'acc_word': 0.995224, 'loss_grapheme': 0.027646, 'loss_vowel': 0.019468, 'loss_consonant': 0.015116, 'loss_word': 0.026632}
   13 | 0.000278 | 160000/160635 | 15.2891 | 6.0466 ||
val: {'recall': 0.99703, 'recall_grapheme': 0.996172, 'recall_vowel': 0.997988, 'recall_consonant': 0.997789, 'recall_word': 0.995326, 'acc_grapheme': 0.995871, 'acc_vowel': 0.998085, 'acc_consonant': 0.998707, 'acc_word': 0.995299, 'loss_grapheme': 0.023654, 'loss_vowel': 0.01606, 'loss_consonant': 0.012201, 'loss_word': 0.02388}
   14 | 0.000275 | 160000/160635 | 0.2953 | 5.3545 ||
val: {'recall': 0.996974, 'recall_grapheme': 0.996006, 'recall_vowel': 0.997939, 'recall_consonant': 0.997947, 'recall_word': 0.995408, 'acc_grapheme': 0.995697, 'acc_vo

   33 | 0.000185 | 160000/160635 | 0.5642 | 5.5665 ||
val: {'recall': 0.996989, 'recall_grapheme': 0.996197, 'recall_vowel': 0.997905, 'recall_consonant': 0.997658, 'recall_word': 0.99546, 'acc_grapheme': 0.995722, 'acc_vowel': 0.998284, 'acc_consonant': 0.99898, 'acc_word': 0.995423, 'loss_grapheme': 0.027371, 'loss_vowel': 0.019772, 'loss_consonant': 0.016444, 'loss_word': 0.024955}
   34 | 0.000179 | 160000/160635 | 0.4136 | 5.7351 ||
val: {'recall': 0.997446, 'recall_grapheme': 0.996802, 'recall_vowel': 0.997651, 'recall_consonant': 0.998528, 'recall_word': 0.996025, 'acc_grapheme': 0.996418, 'acc_vowel': 0.998209, 'acc_consonant': 0.99893, 'acc_word': 0.995946, 'loss_grapheme': 0.020784, 'loss_vowel': 0.014078, 'loss_consonant': 0.010538, 'loss_word': 0.01982}
   35 | 0.000173 | 160000/160635 | 6.4616 | 5.7280 |||
val: {'recall': 0.99743, 'recall_grapheme': 0.996651, 'recall_vowel': 0.998174, 'recall_consonant': 0.998243, 'recall_word': 0.99594, 'acc_grapheme': 0.996294, 'acc_vowe

   54 | 0.000067 | 160000/160635 | 12.4477 | 5.5955 ||
val: {'recall': 0.997802, 'recall_grapheme': 0.997165, 'recall_vowel': 0.998296, 'recall_consonant': 0.99858, 'recall_word': 0.996048, 'acc_grapheme': 0.996642, 'acc_vowel': 0.998383, 'acc_consonant': 0.999005, 'acc_word': 0.996045, 'loss_grapheme': 0.028503, 'loss_vowel': 0.021739, 'loss_consonant': 0.016533, 'loss_word': 0.026119}
###>>>>> saved
   55 | 0.000062 | 160000/160635 | 6.3102 | 5.7337 |||
val: {'recall': 0.997353, 'recall_grapheme': 0.996639, 'recall_vowel': 0.998037, 'recall_consonant': 0.998095, 'recall_word': 0.995913, 'acc_grapheme': 0.996294, 'acc_vowel': 0.998259, 'acc_consonant': 0.99898, 'acc_word': 0.995896, 'loss_grapheme': 0.021151, 'loss_vowel': 0.013977, 'loss_consonant': 0.010984, 'loss_word': 0.021681}
   56 | 0.000057 | 160000/160635 | 13.6307 | 5.3485 ||
val: {'recall': 0.997341, 'recall_grapheme': 0.996861, 'recall_vowel': 0.998059, 'recall_consonant': 0.997583, 'recall_word': 0.995731, 'acc_grapheme'

KeyboardInterrupt: 

In [39]:
train(args, model)

CYCLE: 1
{'recall': 0.996308, 'recall_grapheme': 0.995273, 'recall_vowel': 0.997201, 'recall_consonant': 0.997485, 'recall_word': 0.993975, 'acc_grapheme': 0.994603, 'acc_vowel': 0.997289, 'acc_consonant': 0.998159, 'acc_word': 0.993956, 'loss_grapheme': 0.041434, 'loss_vowel': 0.029393, 'loss_consonant': 0.021052, 'loss_word': 0.035974}
    0 | 0.000060 | 160000/160635 | 0.2920 | 5.5432 ||
val: {'recall': 0.995986, 'recall_grapheme': 0.995068, 'recall_vowel': 0.997316, 'recall_consonant': 0.996493, 'recall_word': 0.993781, 'acc_grapheme': 0.99423, 'acc_vowel': 0.997189, 'acc_consonant': 0.998159, 'acc_word': 0.993832, 'loss_grapheme': 0.075465, 'loss_vowel': 0.059914, 'loss_consonant': 0.04144, 'loss_word': 0.053402}
    1 | 0.000120 | 160000/160635 | 8.0131 | 5.9631 ||
val: {'recall': 0.994908, 'recall_grapheme': 0.993831, 'recall_vowel': 0.996551, 'recall_consonant': 0.995419, 'recall_word': 0.993102, 'acc_grapheme': 0.992762, 'acc_vowel': 0.996393, 'acc_consonant': 0.997463, 'acc_w

   21 | 0.000247 | 160000/160635 | 11.2543 | 5.3236 |
val: {'recall': 0.995887, 'recall_grapheme': 0.994196, 'recall_vowel': 0.997398, 'recall_consonant': 0.997757, 'recall_word': 0.993472, 'acc_grapheme': 0.993483, 'acc_vowel': 0.99714, 'acc_consonant': 0.998209, 'acc_word': 0.993558, 'loss_grapheme': 0.120974, 'loss_vowel': 0.09259, 'loss_consonant': 0.062598, 'loss_word': 0.075114}
   22 | 0.000243 | 160000/160635 | 5.3731 | 5.5523 |||
val: {'recall': 0.996105, 'recall_grapheme': 0.995217, 'recall_vowel': 0.996771, 'recall_consonant': 0.997215, 'recall_word': 0.994017, 'acc_grapheme': 0.99418, 'acc_vowel': 0.99714, 'acc_consonant': 0.998234, 'acc_word': 0.994055, 'loss_grapheme': 0.091323, 'loss_vowel': 0.075162, 'loss_consonant': 0.053502, 'loss_word': 0.061462}
   23 | 0.000238 | 160000/160635 | 3.4371 | 5.6049 ||
val: {'recall': 0.99578, 'recall_grapheme': 0.994146, 'recall_vowel': 0.997398, 'recall_consonant': 0.997431, 'recall_word': 0.993795, 'acc_grapheme': 0.993508, 'acc_vow

   43 | 0.000127 | 160000/160635 | 8.1133 | 5.8494 ||
val: {'recall': 0.996222, 'recall_grapheme': 0.994756, 'recall_vowel': 0.997538, 'recall_consonant': 0.997839, 'recall_word': 0.994193, 'acc_grapheme': 0.994205, 'acc_vowel': 0.997314, 'acc_consonant': 0.998408, 'acc_word': 0.994155, 'loss_grapheme': 0.101491, 'loss_vowel': 0.085259, 'loss_consonant': 0.059546, 'loss_word': 0.066926}
   44 | 0.000121 | 160000/160635 | 11.0252 | 5.4188 |
val: {'recall': 0.996076, 'recall_grapheme': 0.99475, 'recall_vowel': 0.99736, 'recall_consonant': 0.997446, 'recall_word': 0.994224, 'acc_grapheme': 0.99408, 'acc_vowel': 0.997413, 'acc_consonant': 0.998433, 'acc_word': 0.99423, 'loss_grapheme': 0.125227, 'loss_vowel': 0.112274, 'loss_consonant': 0.078334, 'loss_word': 0.074809}
   45 | 0.000115 | 160000/160635 | 5.5866 | 4.9891 ||
val: {'recall': 0.996647, 'recall_grapheme': 0.995427, 'recall_vowel': 0.997382, 'recall_consonant': 0.99835, 'recall_word': 0.994493, 'acc_grapheme': 0.994752, 'acc_vowe

   64 | 0.000025 | 160000/160635 | 4.3875 | 5.0573 ||
val: {'recall': 0.996591, 'recall_grapheme': 0.995529, 'recall_vowel': 0.997283, 'recall_consonant': 0.998023, 'recall_word': 0.994801, 'acc_grapheme': 0.994951, 'acc_vowel': 0.997513, 'acc_consonant': 0.998756, 'acc_word': 0.994851, 'loss_grapheme': 0.059175, 'loss_vowel': 0.048154, 'loss_consonant': 0.035569, 'loss_word': 0.044043}
   65 | 0.000022 | 160000/160635 | 10.3529 | 5.1616 |
val: {'recall': 0.996847, 'recall_grapheme': 0.995767, 'recall_vowel': 0.997461, 'recall_consonant': 0.998393, 'recall_word': 0.99491, 'acc_grapheme': 0.995175, 'acc_vowel': 0.997687, 'acc_consonant': 0.998707, 'acc_word': 0.995001, 'loss_grapheme': 0.049876, 'loss_vowel': 0.042581, 'loss_consonant': 0.031549, 'loss_word': 0.038443}
   66 | 0.000019 | 160000/160635 | 0.4634 | 5.1400 ||
val: {'recall': 0.99694, 'recall_grapheme': 0.995895, 'recall_vowel': 0.997479, 'recall_consonant': 0.998492, 'recall_word': 0.994923, 'acc_grapheme': 0.995175, 'acc_v

KeyboardInterrupt: 

In [39]:
train(args, model)

CYCLE: 1
{'recall': 0.996308, 'recall_grapheme': 0.995273, 'recall_vowel': 0.997201, 'recall_consonant': 0.997485, 'recall_word': 0.993975, 'acc_grapheme': 0.994603, 'acc_vowel': 0.997289, 'acc_consonant': 0.998159, 'acc_word': 0.993956, 'loss_grapheme': 0.041434, 'loss_vowel': 0.029393, 'loss_consonant': 0.021052, 'loss_word': 0.035974}
    0 | 0.000060 | 160000/160635 | 14.4489 | 10.7515 |
val: {'recall': 0.994211, 'recall_grapheme': 0.992448, 'recall_vowel': 0.995722, 'recall_consonant': 0.996226, 'recall_word': 0.993265, 'acc_grapheme': 0.991369, 'acc_vowel': 0.995274, 'acc_consonant': 0.99617, 'acc_word': 0.993309, 'loss_grapheme': 0.240193, 'loss_vowel': 0.208128, 'loss_consonant': 0.133368, 'loss_word': 0.122518}
    1 | 0.000120 | 160000/160635 | 7.3266 | 10.6809 ||
val: {'recall': 0.991872, 'recall_grapheme': 0.990698, 'recall_vowel': 0.993411, 'recall_consonant': 0.992682, 'recall_word': 0.993069, 'acc_grapheme': 0.990275, 'acc_vowel': 0.990524, 'acc_consonant': 0.981818, 'ac

   21 | 0.000247 | 160000/160635 | 12.6005 | 9.5486 ||
val: {'recall': 0.994709, 'recall_grapheme': 0.993185, 'recall_vowel': 0.99537, 'recall_consonant': 0.997094, 'recall_word': 0.994178, 'acc_grapheme': 0.991966, 'acc_vowel': 0.995622, 'acc_consonant': 0.994827, 'acc_word': 0.99423, 'loss_grapheme': 0.20841, 'loss_vowel': 0.18953, 'loss_consonant': 0.128183, 'loss_word': 0.099562}
   22 | 0.000243 | 160000/160635 | 13.5231 | 9.7619 ||
val: {'recall': 0.99494, 'recall_grapheme': 0.993457, 'recall_vowel': 0.996012, 'recall_consonant': 0.996835, 'recall_word': 0.993631, 'acc_grapheme': 0.992886, 'acc_vowel': 0.996219, 'acc_consonant': 0.997314, 'acc_word': 0.993707, 'loss_grapheme': 0.182655, 'loss_vowel': 0.164524, 'loss_consonant': 0.106536, 'loss_word': 0.09894}
   23 | 0.000238 | 160000/160635 | 1.7195 | 10.0520 ||
val: {'recall': 0.994585, 'recall_grapheme': 0.992382, 'recall_vowel': 0.995836, 'recall_consonant': 0.997742, 'recall_word': 0.994032, 'acc_grapheme': 0.992414, 'acc_vo

KeyboardInterrupt: 

In [38]:
train(args, model)

CYCLE: 1
{'recall': 0.992515, 'recall_grapheme': 0.990199, 'recall_vowel': 0.995289, 'recall_consonant': 0.994371, 'recall_word': 0.989434, 'acc_grapheme': 0.989379, 'acc_vowel': 0.995249, 'acc_consonant': 0.996219, 'acc_word': 0.989504, 'loss_grapheme': 0.180598, 'loss_vowel': 0.129995, 'loss_consonant': 0.083757, 'loss_word': 0.13174}
    0 | 0.000080 | 160000/160635 | 13.4792 | 10.0275 |
val: {'recall': 0.991383, 'recall_grapheme': 0.987877, 'recall_vowel': 0.994628, 'recall_consonant': 0.995149, 'recall_word': 0.989062, 'acc_grapheme': 0.98724, 'acc_vowel': 0.994553, 'acc_consonant': 0.995498, 'acc_word': 0.989106, 'loss_grapheme': 0.297682, 'loss_vowel': 0.239045, 'loss_consonant': 0.1532, 'loss_word': 0.170055}
    1 | 0.000160 | 160000/160635 | 11.0585 | 10.1233 |
val: {'recall': 0.990933, 'recall_grapheme': 0.987167, 'recall_vowel': 0.994776, 'recall_consonant': 0.99462, 'recall_word': 0.98901, 'acc_grapheme': 0.986867, 'acc_vowel': 0.994478, 'acc_consonant': 0.995125, 'acc_wor

   20 | 0.000358 | 160000/160635 | 8.7820 | 9.9976 | |
val: {'recall': 0.992955, 'recall_grapheme': 0.990539, 'recall_vowel': 0.995185, 'recall_consonant': 0.995559, 'recall_word': 0.991181, 'acc_grapheme': 0.989379, 'acc_vowel': 0.995647, 'acc_consonant': 0.996344, 'acc_word': 0.99127, 'loss_grapheme': 0.24776, 'loss_vowel': 0.199267, 'loss_consonant': 0.132784, 'loss_word': 0.147324}
   21 | 0.000354 | 160000/160635 | 7.8672 | 9.7938 ||
val: {'recall': 0.99301, 'recall_grapheme': 0.990275, 'recall_vowel': 0.994856, 'recall_consonant': 0.996636, 'recall_word': 0.990716, 'acc_grapheme': 0.989031, 'acc_vowel': 0.995249, 'acc_consonant': 0.995946, 'acc_word': 0.990723, 'loss_grapheme': 0.254739, 'loss_vowel': 0.201508, 'loss_consonant': 0.127749, 'loss_word': 0.157405}
###>>>>> saved
   22 | 0.000350 | 160000/160635 | 13.4232 | 10.5280 |
val: {'recall': 0.992497, 'recall_grapheme': 0.989834, 'recall_vowel': 0.994887, 'recall_consonant': 0.995432, 'recall_word': 0.991315, 'acc_grapheme': 

   41 | 0.000250 | 160000/160635 | 10.8335 | 9.4911 ||
val: {'recall': 0.99404, 'recall_grapheme': 0.991965, 'recall_vowel': 0.995324, 'recall_consonant': 0.996904, 'recall_word': 0.991899, 'acc_grapheme': 0.991121, 'acc_vowel': 0.995747, 'acc_consonant': 0.996742, 'acc_word': 0.991991, 'loss_grapheme': 0.225798, 'loss_vowel': 0.175805, 'loss_consonant': 0.115398, 'loss_word': 0.133651}
   42 | 0.000244 | 160000/160635 | 12.6424 | 9.3731 ||
val: {'recall': 0.993692, 'recall_grapheme': 0.991466, 'recall_vowel': 0.995279, 'recall_consonant': 0.996557, 'recall_word': 0.991887, 'acc_grapheme': 0.990747, 'acc_vowel': 0.995498, 'acc_consonant': 0.996667, 'acc_word': 0.991892, 'loss_grapheme': 0.231146, 'loss_vowel': 0.185245, 'loss_consonant': 0.118939, 'loss_word': 0.121904}
   43 | 0.000238 | 160000/160635 | 7.5078 | 10.0733 ||
val: {'recall': 0.993566, 'recall_grapheme': 0.991236, 'recall_vowel': 0.995098, 'recall_consonant': 0.996695, 'recall_word': 0.991906, 'acc_grapheme': 0.990648, 'a

###>>>>> saved
   62 | 0.000121 | 160000/160635 | 12.4054 | 9.0314 ||
val: {'recall': 0.993898, 'recall_grapheme': 0.991441, 'recall_vowel': 0.995691, 'recall_consonant': 0.997018, 'recall_word': 0.992546, 'acc_grapheme': 0.991469, 'acc_vowel': 0.99602, 'acc_consonant': 0.997438, 'acc_word': 0.992613, 'loss_grapheme': 0.216529, 'loss_vowel': 0.187651, 'loss_consonant': 0.122937, 'loss_word': 0.118099}
   63 | 0.000115 | 160000/160635 | 10.9154 | 9.4967 ||
val: {'recall': 0.993406, 'recall_grapheme': 0.990455, 'recall_vowel': 0.995609, 'recall_consonant': 0.997106, 'recall_word': 0.992729, 'acc_grapheme': 0.990872, 'acc_vowel': 0.995946, 'acc_consonant': 0.99709, 'acc_word': 0.992712, 'loss_grapheme': 0.212273, 'loss_vowel': 0.174297, 'loss_consonant': 0.11408, 'loss_word': 0.111553}
   64 | 0.000109 | 160000/160635 | 10.9567 | 9.5122 ||
val: {'recall': 0.994446, 'recall_grapheme': 0.992557, 'recall_vowel': 0.995568, 'recall_consonant': 0.997102, 'recall_word': 0.992528, 'acc_grapheme':

   83 | 0.000025 | 160000/160635 | 1.7417 | 9.0950 ||
val: {'recall': 0.994536, 'recall_grapheme': 0.992502, 'recall_vowel': 0.995866, 'recall_consonant': 0.997276, 'recall_word': 0.99306, 'acc_grapheme': 0.99229, 'acc_vowel': 0.996319, 'acc_consonant': 0.997562, 'acc_word': 0.993135, 'loss_grapheme': 0.156126, 'loss_vowel': 0.138185, 'loss_consonant': 0.088311, 'loss_word': 0.096059}
   84 | 0.000022 | 160000/160635 | 11.5469 | 9.0634 |
val: {'recall': 0.994213, 'recall_grapheme': 0.991834, 'recall_vowel': 0.995919, 'recall_consonant': 0.997266, 'recall_word': 0.992682, 'acc_grapheme': 0.991693, 'acc_vowel': 0.996319, 'acc_consonant': 0.997538, 'acc_word': 0.992712, 'loss_grapheme': 0.196601, 'loss_vowel': 0.165296, 'loss_consonant': 0.1079, 'loss_word': 0.119002}
   85 | 0.000019 | 160000/160635 | 10.2474 | 9.4698 ||
val: {'recall': 0.994482, 'recall_grapheme': 0.992396, 'recall_vowel': 0.99588, 'recall_consonant': 0.997256, 'recall_word': 0.992692, 'acc_grapheme': 0.991842, 'acc_vow

    4 | 0.000199 | 160000/160635 | 10.8497 | 9.1854 ||
val: {'recall': 0.994246, 'recall_grapheme': 0.992051, 'recall_vowel': 0.995728, 'recall_consonant': 0.997154, 'recall_word': 0.992182, 'acc_grapheme': 0.991543, 'acc_vowel': 0.996145, 'acc_consonant': 0.997264, 'acc_word': 0.99224, 'loss_grapheme': 0.183527, 'loss_vowel': 0.165026, 'loss_consonant': 0.108609, 'loss_word': 0.10669}
    5 | 0.000238 | 160000/160635 | 0.5802 | 9.3553 ||
val: {'recall': 0.994259, 'recall_grapheme': 0.992047, 'recall_vowel': 0.995899, 'recall_consonant': 0.997044, 'recall_word': 0.992634, 'acc_grapheme': 0.991792, 'acc_vowel': 0.996294, 'acc_consonant': 0.997364, 'acc_word': 0.992712, 'loss_grapheme': 0.185249, 'loss_vowel': 0.157057, 'loss_consonant': 0.105629, 'loss_word': 0.096744}
    6 | 0.000276 | 160000/160635 | 9.2080 | 9.1878 |||
val: {'recall': 0.994189, 'recall_grapheme': 0.991886, 'recall_vowel': 0.995692, 'recall_consonant': 0.997291, 'recall_word': 0.992335, 'acc_grapheme': 0.991195, 'acc

   25 | 0.000337 | 160000/160635 | 11.5827 | 9.1823 ||
val: {'recall': 0.994926, 'recall_grapheme': 0.99306, 'recall_vowel': 0.996299, 'recall_consonant': 0.997286, 'recall_word': 0.992693, 'acc_grapheme': 0.992165, 'acc_vowel': 0.996543, 'acc_consonant': 0.997861, 'acc_word': 0.992712, 'loss_grapheme': 0.157702, 'loss_vowel': 0.12565, 'loss_consonant': 0.086099, 'loss_word': 0.108678}
   26 | 0.000332 | 160000/160635 | 13.6637 | 9.2863 ||
val: {'recall': 0.994934, 'recall_grapheme': 0.993271, 'recall_vowel': 0.996165, 'recall_consonant': 0.997028, 'recall_word': 0.993128, 'acc_grapheme': 0.992687, 'acc_vowel': 0.996692, 'acc_consonant': 0.997662, 'acc_word': 0.993135, 'loss_grapheme': 0.127337, 'loss_vowel': 0.105337, 'loss_consonant': 0.069914, 'loss_word': 0.093382}
   27 | 0.000328 | 160000/160635 | 9.8412 | 9.2383 |||
val: {'recall': 0.99472, 'recall_grapheme': 0.99259, 'recall_vowel': 0.996485, 'recall_consonant': 0.997212, 'recall_word': 0.993065, 'acc_grapheme': 0.992414, 'acc_

   46 | 0.000219 | 160000/160635 | 12.5132 | 9.0792 ||
val: {'recall': 0.995411, 'recall_grapheme': 0.994242, 'recall_vowel': 0.996855, 'recall_consonant': 0.996306, 'recall_word': 0.993697, 'acc_grapheme': 0.993284, 'acc_vowel': 0.99704, 'acc_consonant': 0.997836, 'acc_word': 0.993707, 'loss_grapheme': 0.144529, 'loss_vowel': 0.126438, 'loss_consonant': 0.086878, 'loss_word': 0.092528}
   47 | 0.000213 | 160000/160635 | 2.8598 | 9.3368 |||
val: {'recall': 0.995242, 'recall_grapheme': 0.99384, 'recall_vowel': 0.996306, 'recall_consonant': 0.996984, 'recall_word': 0.993863, 'acc_grapheme': 0.993384, 'acc_vowel': 0.996642, 'acc_consonant': 0.997811, 'acc_word': 0.993881, 'loss_grapheme': 0.152012, 'loss_vowel': 0.133012, 'loss_consonant': 0.09084, 'loss_word': 0.081554}
   48 | 0.000206 | 160000/160635 | 7.2941 | 8.8293 |||
val: {'recall': 0.995287, 'recall_grapheme': 0.993744, 'recall_vowel': 0.996094, 'recall_consonant': 0.997565, 'recall_word': 0.994095, 'acc_grapheme': 0.993309, 'acc

   67 | 0.000093 | 160000/160635 | 8.6760 | 9.0319 ||
val: {'recall': 0.995371, 'recall_grapheme': 0.993666, 'recall_vowel': 0.996861, 'recall_consonant': 0.997291, 'recall_word': 0.99387, 'acc_grapheme': 0.993185, 'acc_vowel': 0.996816, 'acc_consonant': 0.997886, 'acc_word': 0.993956, 'loss_grapheme': 0.126874, 'loss_vowel': 0.121188, 'loss_consonant': 0.085572, 'loss_word': 0.071764}
   68 | 0.000088 | 160000/160635 | 10.0958 | 8.5945 ||
val: {'recall': 0.995642, 'recall_grapheme': 0.994465, 'recall_vowel': 0.996733, 'recall_consonant': 0.996903, 'recall_word': 0.993941, 'acc_grapheme': 0.993757, 'acc_vowel': 0.997065, 'acc_consonant': 0.99806, 'acc_word': 0.994006, 'loss_grapheme': 0.116781, 'loss_vowel': 0.102646, 'loss_consonant': 0.073468, 'loss_word': 0.071353}
   69 | 0.000082 | 160000/160635 | 11.0556 | 8.8254 ||
val: {'recall': 0.995677, 'recall_grapheme': 0.994449, 'recall_vowel': 0.996937, 'recall_consonant': 0.996873, 'recall_word': 0.994316, 'acc_grapheme': 0.993981, 'acc

   88 | 0.000012 | 160000/160635 | 12.0667 | 9.0004 |
val: {'recall': 0.995459, 'recall_grapheme': 0.994115, 'recall_vowel': 0.996638, 'recall_consonant': 0.99697, 'recall_word': 0.994097, 'acc_grapheme': 0.993583, 'acc_vowel': 0.997015, 'acc_consonant': 0.998035, 'acc_word': 0.994105, 'loss_grapheme': 0.130576, 'loss_vowel': 0.123989, 'loss_consonant': 0.088514, 'loss_word': 0.075225}
   89 | 0.000010 | 160000/160635 | 10.1072 | 8.5567 |
val: {'recall': 0.995416, 'recall_grapheme': 0.993983, 'recall_vowel': 0.996734, 'recall_consonant': 0.996963, 'recall_word': 0.993856, 'acc_grapheme': 0.993483, 'acc_vowel': 0.99704, 'acc_consonant': 0.998035, 'acc_word': 0.993906, 'loss_grapheme': 0.123718, 'loss_vowel': 0.118753, 'loss_consonant': 0.087396, 'loss_word': 0.074629}
   90 | 0.000008 | 160000/160635 | 10.3343 | 8.5914 ||
val: {'recall': 0.995254, 'recall_grapheme': 0.993801, 'recall_vowel': 0.996558, 'recall_consonant': 0.996858, 'recall_word': 0.993775, 'acc_grapheme': 0.993459, 'acc_

    9 | 0.000390 | 160000/160635 | 9.8810 | 8.7716 |||
val: {'recall': 0.995799, 'recall_grapheme': 0.994675, 'recall_vowel': 0.996689, 'recall_consonant': 0.997156, 'recall_word': 0.993871, 'acc_grapheme': 0.993931, 'acc_vowel': 0.996916, 'acc_consonant': 0.997911, 'acc_word': 0.993931, 'loss_grapheme': 0.101348, 'loss_vowel': 0.082072, 'loss_consonant': 0.058538, 'loss_word': 0.068742}
   10 | 0.000388 | 160000/160635 | 10.3682 | 8.8818 ||
val: {'recall': 0.995302, 'recall_grapheme': 0.993943, 'recall_vowel': 0.99607, 'recall_consonant': 0.997253, 'recall_word': 0.993646, 'acc_grapheme': 0.993483, 'acc_vowel': 0.996642, 'acc_consonant': 0.997612, 'acc_word': 0.993633, 'loss_grapheme': 0.152051, 'loss_vowel': 0.122484, 'loss_consonant': 0.077517, 'loss_word': 0.09114}
   11 | 0.000386 | 160000/160635 | 8.8178 | 9.1847 |||
val: {'recall': 0.995097, 'recall_grapheme': 0.993417, 'recall_vowel': 0.996199, 'recall_consonant': 0.997357, 'recall_word': 0.993878, 'acc_grapheme': 0.993011, 'ac

KeyboardInterrupt: 

In [38]:
train(args, model)

CYCLE: 1
{'recall': 0.984603, 'recall_grapheme': 0.97713, 'recall_vowel': 0.991312, 'recall_consonant': 0.992841, 'recall_word': 0.97772, 'acc_grapheme': 0.975376, 'acc_vowel': 0.990822, 'acc_consonant': 0.990349, 'acc_word': 0.977764, 'loss_grapheme': 0.377051, 'loss_vowel': 0.251561, 'loss_consonant': 0.166439, 'loss_word': 0.327534}
    0 | 0.000080 | 160000/160635 | 25.9052 | 18.1899 |
val: {'recall': 0.984696, 'recall_grapheme': 0.977376, 'recall_vowel': 0.991461, 'recall_consonant': 0.992571, 'recall_word': 0.978015, 'acc_grapheme': 0.9756, 'acc_vowel': 0.991195, 'acc_consonant': 0.991245, 'acc_word': 0.978038, 'loss_grapheme': 0.340236, 'loss_vowel': 0.240668, 'loss_consonant': 0.161181, 'loss_word': 0.29083}
###>>>>> saved
    1 | 0.000160 | 160000/160635 | 2.3221 | 18.9110 ||
val: {'recall': 0.984781, 'recall_grapheme': 0.978174, 'recall_vowel': 0.991073, 'recall_consonant': 0.991703, 'recall_word': 0.97805, 'acc_grapheme': 0.976694, 'acc_vowel': 0.991319, 'acc_consonant': 0.9

###>>>>> saved
   21 | 0.000354 | 160000/160635 | 15.5916 | 16.6565 |
val: {'recall': 0.98801, 'recall_grapheme': 0.983662, 'recall_vowel': 0.992418, 'recall_consonant': 0.992298, 'recall_word': 0.982332, 'acc_grapheme': 0.981445, 'acc_vowel': 0.992812, 'acc_consonant': 0.992886, 'acc_word': 0.982365, 'loss_grapheme': 0.324178, 'loss_vowel': 0.234505, 'loss_consonant': 0.151151, 'loss_word': 0.243136}
   22 | 0.000350 | 160000/160635 | 23.2937 | 17.4513 |
val: {'recall': 0.988243, 'recall_grapheme': 0.98287, 'recall_vowel': 0.993232, 'recall_consonant': 0.993999, 'recall_word': 0.982236, 'acc_grapheme': 0.981022, 'acc_vowel': 0.993011, 'acc_consonant': 0.993235, 'acc_word': 0.98239, 'loss_grapheme': 0.34441, 'loss_vowel': 0.244334, 'loss_consonant': 0.157811, 'loss_word': 0.282093}
   23 | 0.000346 | 160000/160635 | 16.0969 | 17.3355 |
val: {'recall': 0.987697, 'recall_grapheme': 0.983472, 'recall_vowel': 0.991819, 'recall_consonant': 0.992025, 'recall_word': 0.981513, 'acc_grapheme': 

###>>>>> saved
   42 | 0.000244 | 160000/160635 | 21.5886 | 16.1747 |
val: {'recall': 0.990179, 'recall_grapheme': 0.986513, 'recall_vowel': 0.993908, 'recall_consonant': 0.993783, 'recall_word': 0.986363, 'acc_grapheme': 0.984853, 'acc_vowel': 0.993583, 'acc_consonant': 0.994429, 'acc_word': 0.98637, 'loss_grapheme': 0.29636, 'loss_vowel': 0.225799, 'loss_consonant': 0.150296, 'loss_word': 0.187128}
   43 | 0.000238 | 160000/160635 | 0.9276 | 15.5587 ||
val: {'recall': 0.989957, 'recall_grapheme': 0.986283, 'recall_vowel': 0.993358, 'recall_consonant': 0.993906, 'recall_word': 0.985797, 'acc_grapheme': 0.985126, 'acc_vowel': 0.993856, 'acc_consonant': 0.994478, 'acc_word': 0.985897, 'loss_grapheme': 0.200493, 'loss_vowel': 0.146154, 'loss_consonant': 0.099368, 'loss_word': 0.133185}
   44 | 0.000231 | 160000/160635 | 0.9186 | 15.4935 ||
val: {'recall': 0.989522, 'recall_grapheme': 0.985582, 'recall_vowel': 0.993522, 'recall_consonant': 0.993404, 'recall_word': 0.986566, 'acc_grapheme'

###>>>>> saved
   63 | 0.000115 | 160000/160635 | 21.0869 | 15.4909 |
val: {'recall': 0.991303, 'recall_grapheme': 0.987797, 'recall_vowel': 0.994674, 'recall_consonant': 0.994944, 'recall_word': 0.988234, 'acc_grapheme': 0.987191, 'acc_vowel': 0.994652, 'acc_consonant': 0.99515, 'acc_word': 0.988285, 'loss_grapheme': 0.248291, 'loss_vowel': 0.195017, 'loss_consonant': 0.12395, 'loss_word': 0.15407}
   64 | 0.000109 | 160000/160635 | 17.6079 | 15.3241 |
val: {'recall': 0.991486, 'recall_grapheme': 0.987871, 'recall_vowel': 0.994552, 'recall_consonant': 0.995651, 'recall_word': 0.989058, 'acc_grapheme': 0.986818, 'acc_vowel': 0.994354, 'acc_consonant': 0.99505, 'acc_word': 0.989006, 'loss_grapheme': 0.301503, 'loss_vowel': 0.234748, 'loss_consonant': 0.153272, 'loss_word': 0.16906}
   65 | 0.000104 | 160000/160635 | 11.3319 | 15.2315 |
val: {'recall': 0.991995, 'recall_grapheme': 0.988611, 'recall_vowel': 0.995208, 'recall_consonant': 0.99555, 'recall_word': 0.988736, 'acc_grapheme': 0.

   84 | 0.000022 | 160000/160635 | 19.3180 | 14.6824 |
val: {'recall': 0.991856, 'recall_grapheme': 0.9882, 'recall_vowel': 0.99511, 'recall_consonant': 0.995914, 'recall_word': 0.989211, 'acc_grapheme': 0.987663, 'acc_vowel': 0.995001, 'acc_consonant': 0.995722, 'acc_word': 0.98923, 'loss_grapheme': 0.262447, 'loss_vowel': 0.205642, 'loss_consonant': 0.128688, 'loss_word': 0.160564}
   85 | 0.000019 | 160000/160635 | 16.4641 | 15.8382 |
val: {'recall': 0.99163, 'recall_grapheme': 0.988013, 'recall_vowel': 0.994678, 'recall_consonant': 0.995817, 'recall_word': 0.989342, 'acc_grapheme': 0.987191, 'acc_vowel': 0.994503, 'acc_consonant': 0.995399, 'acc_word': 0.989379, 'loss_grapheme': 0.314966, 'loss_vowel': 0.257951, 'loss_consonant': 0.16325, 'loss_word': 0.172734}
   86 | 0.000016 | 160000/160635 | 13.6084 | 15.3989 |
val: {'recall': 0.991518, 'recall_grapheme': 0.988468, 'recall_vowel': 0.994388, 'recall_consonant': 0.994748, 'recall_word': 0.989141, 'acc_grapheme': 0.987862, 'acc_vo

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc7c8eb40d0>
Traceback (most recent call last):
  File "/home/chec/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 926, in __del__
    self._shutdown_workers()
  File "/home/chec/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 906, in _shutdown_workers
    w.join()
  File "/home/chec/anaconda3/lib/python3.7/multiprocessing/process.py", line 140, in join
    res = self._popen.wait(timeout)
  File "/home/chec/anaconda3/lib/python3.7/multiprocessing/popen_fork.py", line 48, in wait
    return self.poll(os.WNOHANG if timeout == 0.0 else 0)
  File "/home/chec/anaconda3/lib/python3.7/multiprocessing/popen_fork.py", line 28, in poll
    pid, sts = os.waitpid(self.pid, flag)
KeyboardInterrupt: 


KeyboardInterrupt: 

In [None]:
#save_model(model, model_file)

In [42]:
del model