In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image
from utils import bn_update, moving_average, copy_model


In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
#import albumentations as albu
def get_train_augs():
    return RandAugment(n=2, m=27)

In [9]:
#plt.imshow(x)

In [10]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [11]:
#train_loader, val_loader = get_train_val_loaders()

In [12]:
#for x in train_loader:
#    print(x)
#    break

# model

In [13]:
#import pretrainedmodels

In [14]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [15]:
from argparse import Namespace
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

class BengaliNet(nn.Module):
    """
    EfficientNet B0-B8.
    Args:
        cfg (CfgNode): configs
    """
    def __init__(self, cfg):
        super(BengaliNet, self).__init__()
        model_name = cfg.MODEL_NAME
        pretrained = cfg.PRETRAINED
        input_channels = cfg.IN_CHANNELS
        pool_type = cfg.POOL_TYPE
        drop_connect_rate = cfg.DROP_CONNECT
        self.drop_rate = cfg.DROPOUT
        cls_head = cfg.CLS_HEAD
        num_total_classes = cfg.NUM_GRAPHEME_CLASSES + cfg.NUM_VOWEL_CLASSES + cfg.NUM_CONSONANT_CLASSES \
            + cfg.NUM_WORD_CLASSES

        backbone = timm.create_model(
            model_name=model_name,
            pretrained=pretrained,
            in_chans=input_channels,
            drop_connect_rate=drop_connect_rate,
        )
        self.conv_stem = backbone.conv_stem
        self.bn1 = backbone.bn1
        self.act1 = backbone.act1
        ### Original blocks ###
        for i in range(len((backbone.blocks))):
            setattr(self, "block{}".format(str(i)), backbone.blocks[i])
        self.conv_head = backbone.conv_head
        self.bn2 = backbone.bn2
        self.act2 = backbone.act2
        self.aux_block5 = backbone.blocks[5]
        self.aux_num_features = self.block5[-1].bn3.num_features
        self.aux_head4 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn4 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act4 = Swish()
        self.aux_head5 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn5 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act5 = Swish()
        self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
        self.num_features = backbone.num_features * self.global_pool.feat_mult()
        assert cls_head == 'linear'
        if cls_head == "linear":
            ### Baseline head ###
            self.fc = nn.Linear(self.num_features, num_total_classes)            
            self.aux_fc1 = nn.Linear(self.aux_num_features*4, num_total_classes)
            self.aux_fc2 = nn.Linear(self.aux_num_features*4, num_total_classes)
            
            for fc in [self.fc, self.aux_fc1, self.aux_fc2]:
                nn.init.zeros_(fc.bias.data)
        elif cls_head == "norm_softmax":
            ### NormSoftmax ###
            self.grapheme_fc = NormSoftmax(self.num_features, num_grapheme_classes)
            self.consonant_fc = NormSoftmax(self.num_features, num_consonant_classes)
            self.vowel_fc = NormSoftmax(self.num_features, num_vowel_classes)
        # Replace with Mish activation
        if cfg.MODEL_ACTIVATION == "mish":
            convert_swish_to_mish(self)
        del backbone

    def _features(self, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.block0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x); b4 = x
        x = self.block5(x); b4 = self.aux_block5(b4); b5 = x
        x = self.block6(x)
        x = self.conv_head(x); b4 = self.aux_head4(b4); b5 = self.aux_head5(b5)
        x = self.bn2(x); b4 = self.bn4(b4); b5 = self.bn5(b5)
        x = self.act2(x); b4 = self.act4(b4); b5 = self.act5(b5)
        return b4, b5, x

    def forward(self, x):
        x = F.interpolate(x, size=(380, 380), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)

        # _, _, x = self._features(x)
        b4, b5, x = self._features(x)
        x = self.global_pool(x); b4 = self.global_pool(b4); b5 = self.global_pool(b5)
        x = torch.flatten(x, 1); b4 = torch.flatten(b4, 1); b5 = torch.flatten(b5, 1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        logits = self.fc(x)
        
        aux_logits1 = self.aux_fc1(b4)
        aux_logits2 = self.aux_fc2(b5)
        
        return logits, aux_logits1, aux_logits2

In [16]:
MODEL_DIR = './model4-ckps'
def create_model(cfg):
    model = BengaliNet(cfg)
    model_file = os.path.join(MODEL_DIR, cfg.MODEL_NAME, cfg.CKP_NAME)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [17]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [18]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [19]:
round(1/9, 6)

0.111111

In [20]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, preds3, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2) == len(preds3)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    recall_word = sklearn.metrics.recall_score(preds3, y[:, 3], average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    metrics['recall_word'] = round(recall_word, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    metrics['acc_word'] = round((preds3 == y[:, 3]).sum() / len(y), 6)    
    
    return metrics

In [21]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    loss3 = F.cross_entropy(outputs[3], y_true[:, 3], reduction='mean')
    
    return loss0 + loss1 + loss2 + loss3 #, loss0.item(), loss1.item(), loss2.item()

In [22]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2, loss3 = 0., 0., 0., 0.
    preds0, preds1, preds2, preds3 = [], [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs, _, _ = model(x)
            outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            preds3.append(torch.max(outputs[3], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            loss3 += F.cross_entropy(outputs[3], y[:, 3], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    preds3 = torch.cat(preds3, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, preds3, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    metrics['loss_word'] = round(loss3 / val_loader.num, 6)
    
    return metrics
            

In [23]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [24]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [25]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [26]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [27]:
def rand_bbox_new(size, lam):
    H = size[2]
    W = size[3]

    x_margin_rate = 0.2

    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * (1-x_margin_rate*2) * cut_rat)
    cut_h = np.int(H * cut_rat)
    
    min_x_center = np.int(W * x_margin_rate + cut_w / 2)
    max_x_center = np.int(W * (1-x_margin_rate) - cut_w / 2)
    #print(min_x_center, max_x_center, lam, cut_w)
    min_y_center = cut_h // 2
    max_y_center = H - cut_h // 2
    if max_y_center == min_y_center:
        max_y_center += 1

    # uniform
    cx = np.random.randint(min_x_center, max_x_center)
    cy = np.random.randint(min_y_center, max_y_center)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    
    #print(bbx1, bbx2, bby1, bby2)

    return bbx1, bby1, bbx2, bby2

In [28]:
np.random.random()

0.24730402016717357

In [29]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from gridmask import GridMask

In [30]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [31]:
import time
def train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid):
    train_loss = 0
    bg = time.time()

    for batch_idx, (img, orig_img, targets) in enumerate(train_loader):
        img, orig_img, targets  = img.cuda(), orig_img.cuda(), targets.cuda()
        batch_size = img.size(0)
        r = np.random.rand()

        if r < 0.3:
            # generate mixed sample
            lam = np.random.beta(args.beta, args.beta)
            rand_index = torch.randperm(img.size()[0]).cuda()
            target_a = targets
            target_b = targets[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
            #img[:, :, bby1:bby2, bbx1:bbx2] = img[rand_index, :, bby1:bby2, bbx1:bbx2] #for new cutmix
            img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
            
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
            # compute output
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            loss_aux1 = criterion(outputs_aux1, target_a) * lam + criterion(outputs_aux1, target_b) * (1. - lam)
            loss_aux2 = criterion(outputs_aux2, target_a) * lam + criterion(outputs_aux2, target_b) * (1. - lam)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        elif r > 0.7:
            img = grid(img)
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, targets)
            loss_aux1 = criterion(outputs_aux1, targets)
            loss_aux2 = criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        else:
            orig_img, targets = mixup(orig_img, targets)
            outputs, outputs_aux1, outputs_aux2 = model(orig_img)
            loss_primary = mixup_criterion(outputs, targets)
            loss_aux1 = mixup_criterion(outputs_aux1, targets)
            loss_aux2 = mixup_criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
            #loss = criterion(outputs, targets)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        lr_scheduler(optimizer, batch_idx, epoch)
        optimizer.step()            
        
        current_lr = get_lrs(optimizer)

        train_loss += loss.item()
        print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} | {:.2f} |'.format(
            epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
            loss.item(), train_loss/(batch_idx+1), (time.time() - bg) / 60), end='')


In [32]:
import copy
best_metrics = 0.
best_metrics_swa = 0.


def validate_and_save(model, model_file, val_loader, save=False):
    global best_metrics
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics:
        best_metrics = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()

def validate_and_save_swa(model, model_file, val_loader, save=False):
    global best_metrics_swa
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics_swa:
        best_metrics_swa = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()


def train(args):
    model, model_file = create_model(cfg)
    model = model.cuda()

    swa_cfg = copy.deepcopy(cfg)
    swa_cfg.CKP_NAME = cfg.CKP_NAME + '_swa'
    swa_model, swa_model_file = create_model(swa_cfg)
    swa_model = swa_model.cuda()

    optimizer = make_optimizer(model)
    lr_scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    [model, swa_model], optimizer = amp.initialize([model, swa_model], optimizer, opt_level="O1",verbosity=0)
    #[model, swa_model], optimizer = amp.initialize(
    #    [model, swa_model], optimizer, opt_level="O2",verbosity=0, keep_batchnorm_fp32=True)
    
    #opt_level="O2", keep_batchnorm_fp32=True
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        swa_model = nn.DataParallel(swa_model)
    
    validate_and_save(model, model_file, val_loader, save=False)

    swa_model_loaded = False
    if os.path.exists(swa_model_file):
        swa_model_loaded = True
        validate_and_save_swa(swa_model, swa_model_file, val_loader, save=False)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

        for epoch in range(args.start_epoch, args.num_epochs):
            grid.set_prob(epoch, args.st_epochs)
            train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid)
            validate_and_save(model, model_file, val_loader, save=True)
            
            if (epoch+1) == args.swa_start and cycle == 1:
                if not swa_model_loaded:
                    copy_model(swa_model, model)
                #swa_n = 0
                swa_n = args.swa_n
            if (epoch+1) >= args.swa_start and (epoch+1) % args.swa_freq == 0:
                print('SWA>>>:')
                moving_average(swa_model, model, 1.0 / (swa_n + 1))
                swa_n += 1
                bn_update(train_loader, swa_model)
                validate_and_save_swa(swa_model, swa_model_file, val_loader, save=True)

        #args.base_lr = 2e-4
        #args.num_epochs = 40
        #args.warmup_epochs = 1

        # reset scheduler at each cycle
        lr_scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

In [33]:
cfg = Namespace()
cfg.MODEL_NAME = 'tf_efficientnet_b4'
cfg.PRETRAINED = True
cfg.IN_CHANNELS = 1
cfg.POOL_TYPE = 'avg'
cfg.CLS_HEAD = 'linear'
cfg.MODEL_ACTIVATION = 'swish'
cfg.DROP_CONNECT = 0.2
cfg.DROPOUT= 0.
cfg.NUM_WORD_CLASSES = 1295
cfg.NUM_GRAPHEME_CLASSES = 168
cfg.NUM_VOWEL_CLASSES = 11
cfg.NUM_CONSONANT_CLASSES = 7
cfg.CKP_NAME = 'model4_eb4_fold2_380.pth'

In [34]:
#model, model_file = create_model(cfg)
#model(torch.randn(2,1,137,236))[0].size()

In [35]:
args = Namespace()

args.base_lr = 2e-4
args.num_epochs = 60
args.start_epoch = 0
args.warmup_epochs = 5
args.num_cycles = 100
args.batch_size = 128
args.val_batch_size = 512
args.st_epochs = 5

args.swa_start = 2
args.swa_freq = 3
args.swa_n = 3

args.beta = 1.0
args.cutmix_prob = 0.5

In [36]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=2)

(200840, 6)
(200840, 32332)
(160678, 6) (40162, 6)


In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...


RuntimeError: Caught RuntimeError in replica 1 on device 1.
Original Traceback (most recent call last):
  File "/home/chec/anaconda3/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 60, in _worker
    output = module(*input, **kwargs)
  File "/home/chec/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/chec/anaconda3/lib/python3.7/site-packages/apex/amp/_initialize.py", line 197, in new_fwd
    **applier(kwargs, input_caster))
  File "<ipython-input-15-5c2f38447e47>", line 93, in forward
    b4, b5, x = self._features(x)
  File "<ipython-input-15-5c2f38447e47>", line 72, in _features
    x = self.conv_stem(x)
  File "/home/chec/anaconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 547, in __call__
    result = self.forward(*input, **kwargs)
  File "/home/chec/anaconda3/lib/python3.7/site-packages/timm/models/conv2d_layers.py", line 67, in forward
    return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
  File "/home/chec/anaconda3/lib/python3.7/site-packages/timm/models/conv2d_layers.py", line 53, in conv2d_same
    return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups)
RuntimeError: Expected tensor for argument #1 'input' to have the same device as tensor for argument #2 'weight'; but device 1 does not equal 0 (while checking arguments for cudnn_convolution)


In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...

val: {'recall': 0.998635, 'recall_grapheme': 0.998031, 'recall_vowel': 0.999094, 'recall_consonant': 0.999381, 'recall_word': 0.99789, 'acc_grapheme': 0.998008, 'acc_vowel': 0.999178, 'acc_consonant': 0.999303, 'acc_word': 0.997809, 'loss_grapheme': 0.010964, 'loss_vowel': 0.005378, 'loss_consonant': 0.003544, 'loss_word': 0.011642}

val: {'recall': 0.998894, 'recall_grapheme': 0.9984, 'recall_vowel': 0.999307, 'recall_consonant': 0.999469, 'recall_word': 0.998212, 'acc_grapheme': 0.998282, 'acc_vowel': 0.999328, 'acc_consonant': 0.999452, 'acc_word': 0.998182, 'loss_grapheme': 0.008724, 'loss_vowel': 0.004496, 'loss_consonant': 0.002706, 'loss_word': 0.009823}
CYCLE: 1
    0 | 

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...

val: {'recall': 0.998635, 'recall_grapheme': 0.998031, 'recall_vowel': 0.999094, 'recall_consonant': 0.999381, 'recall_word': 0.99789, 'acc_grapheme': 0.998008, 'acc_vowel': 0.999178, 'acc_consonant': 0.999303, 'acc_word': 0.997809, 'loss_grapheme': 0.010964, 'loss_vowel': 0.005378, 'loss_consonant': 0.003544, 'loss_word': 0.011642}

val: {'recall': 0.998894, 'recall_grapheme': 0.9984, 'recall_vowel': 0.999307, 'recall_consonant': 0.999469, 'recall_word': 0.998212, 'acc_grapheme': 0.998282, 'acc_vowel': 0.999328, 'acc_consonant': 0.999452, 'acc_word': 0.998182, 'loss_grapheme': 0.008724, 'loss_vowel': 0.004496, 'loss_consonant': 0.002706, 'loss_word': 0.009823}
CYCLE: 1
    0 | 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998153, 'recall_grapheme': 0.997244, 'recall_vowel': 0.998818, 'recall_consonant': 0.999304, 'recall_word': 0.997294, 'acc_grapheme': 0.99741, 'acc_vowel': 0.999054, 'acc_consonant': 0.999278, 'acc_word': 0.997236, 'loss_grapheme': 0.012261, 'loss_vowel': 0.005854, 'loss_consonant': 0.003568, 'loss_word': 0.013006}
SWA>>>:


100%|██████████| 1255/1255 [07:08<00:00,  2.91it/s]



val: {'recall': 0.998664, 'recall_grapheme': 0.99798, 'recall_vowel': 0.99932, 'recall_consonant': 0.999377, 'recall_word': 0.998081, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999353, 'acc_consonant': 0.999402, 'acc_word': 0.998033, 'loss_grapheme': 0.00864, 'loss_vowel': 0.004549, 'loss_consonant': 0.002757, 'loss_word': 0.009867}
    3 | 0.000152 | 134400/160678 | 9.8910 | 5.1330 | 11.73 ||

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...

val: {'recall': 0.998635, 'recall_grapheme': 0.998031, 'recall_vowel': 0.999094, 'recall_consonant': 0.999381, 'recall_word': 0.99789, 'acc_grapheme': 0.998008, 'acc_vowel': 0.999178, 'acc_consonant': 0.999303, 'acc_word': 0.997809, 'loss_grapheme': 0.010964, 'loss_vowel': 0.005378, 'loss_consonant': 0.003544, 'loss_word': 0.011642}

val: {'recall': 0.99877, 'recall_grapheme': 0.998213, 'recall_vowel': 0.999307, 'recall_consonant': 0.999345, 'recall_word': 0.998201, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999328, 'acc_consonant': 0.999427, 'acc_word': 0.998182, 'loss_grapheme': 0.008726, 'loss_vowel': 0.004444, 'loss_consonant': 0.002708, 'loss_word': 0.009676}
CYCLE: 1
    0 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998362, 'recall_grapheme': 0.99774, 'recall_vowel': 0.998866, 'recall_consonant': 0.999104, 'recall_word': 0.997574, 'acc_grapheme': 0.997759, 'acc_vowel': 0.999104, 'acc_consonant': 0.999278, 'acc_word': 0.99751, 'loss_grapheme': 0.012019, 'loss_vowel': 0.005669, 'loss_consonant': 0.003796, 'loss_word': 0.012675}
SWA>>>:


100%|██████████| 1255/1255 [07:06<00:00,  2.79it/s]



val: {'recall': 0.998835, 'recall_grapheme': 0.99834, 'recall_vowel': 0.999223, 'recall_consonant': 0.999435, 'recall_word': 0.998161, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999303, 'acc_consonant': 0.999427, 'acc_word': 0.998133, 'loss_grapheme': 0.008634, 'loss_vowel': 0.004441, 'loss_consonant': 0.002688, 'loss_word': 0.009663}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
    3 | 0.000099 | 160640/160678 | 5.7569 | 5.0590 | 14.22 ||
val: {'recall': 0.998404, 'recall_grapheme': 0.997578, 'recall_vowel': 0.999013, 'recall_consonant': 0.999446, 'recall_word': 0.997654, 'acc_grapheme': 0.997635, 'acc_vowel': 0.999129, 'acc_consonant': 0.999353, 'acc_word': 0.997585, 'loss_grapheme': 0.010987, 'loss_vowel': 0.005414, 'loss_consonant': 0.003538, 'loss_word': 0.011809}
    4 | 0.000123 | 160640/160678 | 0.1739 | 5.1097 | 14.33 ||
val: {'recall': 0.998144, 'recall_grapheme': 0.997111, 'recall_vowel': 0.999064, 'recall_consonant': 0.999288, 'recall_word': 0

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998297, 'recall_grapheme': 0.997452, 'recall_vowel': 0.999172, 'recall_consonant': 0.999114, 'recall_word': 0.997514, 'acc_grapheme': 0.997535, 'acc_vowel': 0.999104, 'acc_consonant': 0.999178, 'acc_word': 0.997435, 'loss_grapheme': 0.012643, 'loss_vowel': 0.006185, 'loss_consonant': 0.003926, 'loss_word': 0.013328}
SWA>>>:


100%|██████████| 1255/1255 [07:03<00:00,  2.84it/s]



val: {'recall': 0.99881, 'recall_grapheme': 0.998377, 'recall_vowel': 0.999085, 'recall_consonant': 0.999399, 'recall_word': 0.998097, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999278, 'acc_consonant': 0.999378, 'acc_word': 0.998058, 'loss_grapheme': 0.008602, 'loss_vowel': 0.00454, 'loss_consonant': 0.002689, 'loss_word': 0.009655}
    6 | 0.000169 | 160640/160678 | 5.8427 | 5.3537 | 14.42 ||
val: {'recall': 0.997977, 'recall_grapheme': 0.99688, 'recall_vowel': 0.998886, 'recall_consonant': 0.999262, 'recall_word': 0.996985, 'acc_grapheme': 0.997062, 'acc_vowel': 0.998929, 'acc_consonant': 0.999253, 'acc_word': 0.996937, 'loss_grapheme': 0.014331, 'loss_vowel': 0.006769, 'loss_consonant': 0.004349, 'loss_word': 0.014645}
    7 | 0.000191 | 160640/160678 | 0.0991 | 5.5513 | 14.43 ||
val: {'recall': 0.997988, 'recall_grapheme': 0.996773, 'recall_vowel': 0.999041, 'recall_consonant': 0.999368, 'recall_word': 0.997099, 'acc_grapheme': 0.997087, 'acc_vowel': 0.998979, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998026, 'recall_grapheme': 0.997298, 'recall_vowel': 0.998271, 'recall_consonant': 0.999238, 'recall_word': 0.996853, 'acc_grapheme': 0.997186, 'acc_vowel': 0.99873, 'acc_consonant': 0.999029, 'acc_word': 0.996813, 'loss_grapheme': 0.014271, 'loss_vowel': 0.006655, 'loss_consonant': 0.004533, 'loss_word': 0.015287}
SWA>>>:


100%|██████████| 1255/1255 [07:06<00:00,  2.82it/s]



val: {'recall': 0.998691, 'recall_grapheme': 0.998226, 'recall_vowel': 0.998888, 'recall_consonant': 0.999423, 'recall_word': 0.998109, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999253, 'acc_consonant': 0.999378, 'acc_word': 0.998083, 'loss_grapheme': 0.008493, 'loss_vowel': 0.004623, 'loss_consonant': 0.002754, 'loss_word': 0.009615}
    9 | 0.000233 | 160640/160678 | 4.8243 | 5.2710 | 14.51 ||
val: {'recall': 0.997738, 'recall_grapheme': 0.996525, 'recall_vowel': 0.998662, 'recall_consonant': 0.999239, 'recall_word': 0.996476, 'acc_grapheme': 0.996788, 'acc_vowel': 0.998755, 'acc_consonant': 0.998979, 'acc_word': 0.996489, 'loss_grapheme': 0.015549, 'loss_vowel': 0.007562, 'loss_consonant': 0.005052, 'loss_word': 0.016828}
   10 | 0.000230 | 160640/160678 | 6.4223 | 5.4189 | 14.50 ||
val: {'recall': 0.997293, 'recall_grapheme': 0.996757, 'recall_vowel': 0.998689, 'recall_consonant': 0.996971, 'recall_word': 0.996398, 'acc_grapheme': 0.996688, 'acc_vowel': 0.998755, 'acc_consonant': 0

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997804, 'recall_grapheme': 0.997381, 'recall_vowel': 0.999018, 'recall_consonant': 0.997434, 'recall_word': 0.997146, 'acc_grapheme': 0.997435, 'acc_vowel': 0.999054, 'acc_consonant': 0.999129, 'acc_word': 0.997112, 'loss_grapheme': 0.012354, 'loss_vowel': 0.005647, 'loss_consonant': 0.004298, 'loss_word': 0.012441}
SWA>>>:


100%|██████████| 1255/1255 [07:08<00:00,  2.84it/s]



val: {'recall': 0.998688, 'recall_grapheme': 0.99811, 'recall_vowel': 0.999109, 'recall_consonant': 0.999423, 'recall_word': 0.998136, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999303, 'acc_consonant': 0.999378, 'acc_word': 0.998108, 'loss_grapheme': 0.00858, 'loss_vowel': 0.004545, 'loss_consonant': 0.002727, 'loss_word': 0.009684}
   12 | 0.000222 | 160640/160678 | 12.9064 | 5.3147 | 14.52 |
val: {'recall': 0.997297, 'recall_grapheme': 0.996619, 'recall_vowel': 0.998917, 'recall_consonant': 0.997035, 'recall_word': 0.996755, 'acc_grapheme': 0.996539, 'acc_vowel': 0.998904, 'acc_consonant': 0.999029, 'acc_word': 0.996688, 'loss_grapheme': 0.015785, 'loss_vowel': 0.006133, 'loss_consonant': 0.004614, 'loss_word': 0.015385}
   13 | 0.000218 | 160640/160678 | 5.7532 | 5.4844 | 14.50 ||
val: {'recall': 0.997229, 'recall_grapheme': 0.996568, 'recall_vowel': 0.9987, 'recall_consonant': 0.997081, 'recall_word': 0.99627, 'acc_grapheme': 0.996439, 'acc_vowel': 0.99873, 'acc_consonant': 0.99873

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997287, 'recall_grapheme': 0.996385, 'recall_vowel': 0.998603, 'recall_consonant': 0.997776, 'recall_word': 0.996783, 'acc_grapheme': 0.996937, 'acc_vowel': 0.998705, 'acc_consonant': 0.999004, 'acc_word': 0.996738, 'loss_grapheme': 0.01501, 'loss_vowel': 0.006952, 'loss_consonant': 0.004553, 'loss_word': 0.015881}
SWA>>>:


100%|██████████| 1255/1255 [07:05<00:00,  2.85it/s]



val: {'recall': 0.998695, 'recall_grapheme': 0.998134, 'recall_vowel': 0.999109, 'recall_consonant': 0.999401, 'recall_word': 0.998119, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999303, 'acc_consonant': 0.999402, 'acc_word': 0.998083, 'loss_grapheme': 0.008411, 'loss_vowel': 0.004557, 'loss_consonant': 0.002717, 'loss_word': 0.009555}
   15 | 0.000209 | 160640/160678 | 0.0310 | 5.5186 | 14.48 ||
val: {'recall': 0.997954, 'recall_grapheme': 0.99711, 'recall_vowel': 0.998585, 'recall_consonant': 0.999011, 'recall_word': 0.997161, 'acc_grapheme': 0.997311, 'acc_vowel': 0.998755, 'acc_consonant': 0.999104, 'acc_word': 0.997087, 'loss_grapheme': 0.012888, 'loss_vowel': 0.006928, 'loss_consonant': 0.004507, 'loss_word': 0.013909}
   16 | 0.000204 | 160640/160678 | 6.2653 | 5.1914 | 14.51 ||
val: {'recall': 0.997505, 'recall_grapheme': 0.996128, 'recall_vowel': 0.998752, 'recall_consonant': 0.999014, 'recall_word': 0.996921, 'acc_grapheme': 0.996763, 'acc_vowel': 0.998755, 'acc_consonant': 0.

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997696, 'recall_grapheme': 0.99636, 'recall_vowel': 0.998977, 'recall_consonant': 0.999087, 'recall_word': 0.996722, 'acc_grapheme': 0.996763, 'acc_vowel': 0.999054, 'acc_consonant': 0.999178, 'acc_word': 0.996688, 'loss_grapheme': 0.015776, 'loss_vowel': 0.006023, 'loss_consonant': 0.004079, 'loss_word': 0.015886}
SWA>>>:


100%|██████████| 1255/1255 [07:08<00:00,  2.70it/s]



val: {'recall': 0.998713, 'recall_grapheme': 0.998159, 'recall_vowel': 0.999109, 'recall_consonant': 0.999423, 'recall_word': 0.998173, 'acc_grapheme': 0.998257, 'acc_vowel': 0.999303, 'acc_consonant': 0.999378, 'acc_word': 0.998157, 'loss_grapheme': 0.00843, 'loss_vowel': 0.004532, 'loss_consonant': 0.002714, 'loss_word': 0.009534}
   18 | 0.000193 | 160640/160678 | 0.4835 | 5.1266 | 14.57 ||
val: {'recall': 0.997618, 'recall_grapheme': 0.996743, 'recall_vowel': 0.998825, 'recall_consonant': 0.998162, 'recall_word': 0.997383, 'acc_grapheme': 0.997286, 'acc_vowel': 0.999004, 'acc_consonant': 0.999178, 'acc_word': 0.997361, 'loss_grapheme': 0.011219, 'loss_vowel': 0.005272, 'loss_consonant': 0.00354, 'loss_word': 0.012251}
   19 | 0.000188 | 160640/160678 | 0.3892 | 5.3676 | 14.53 ||
val: {'recall': 0.997904, 'recall_grapheme': 0.996896, 'recall_vowel': 0.998584, 'recall_consonant': 0.999238, 'recall_word': 0.996796, 'acc_grapheme': 0.997062, 'acc_vowel': 0.998705, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998052, 'recall_grapheme': 0.997298, 'recall_vowel': 0.998786, 'recall_consonant': 0.998825, 'recall_word': 0.996943, 'acc_grapheme': 0.997037, 'acc_vowel': 0.998755, 'acc_consonant': 0.998929, 'acc_word': 0.996888, 'loss_grapheme': 0.014448, 'loss_vowel': 0.006678, 'loss_consonant': 0.005034, 'loss_word': 0.014347}
SWA>>>:


100%|██████████| 1255/1255 [07:11<00:00,  2.87it/s]



val: {'recall': 0.998676, 'recall_grapheme': 0.998143, 'recall_vowel': 0.999085, 'recall_consonant': 0.999334, 'recall_word': 0.998148, 'acc_grapheme': 0.998257, 'acc_vowel': 0.999278, 'acc_consonant': 0.999378, 'acc_word': 0.998133, 'loss_grapheme': 0.008501, 'loss_vowel': 0.004557, 'loss_consonant': 0.002792, 'loss_word': 0.009579}
   21 | 0.000176 | 160640/160678 | 10.6470 | 5.3034 | 14.53 |
val: {'recall': 0.997495, 'recall_grapheme': 0.996807, 'recall_vowel': 0.998584, 'recall_consonant': 0.997783, 'recall_word': 0.996647, 'acc_grapheme': 0.996439, 'acc_vowel': 0.998755, 'acc_consonant': 0.998855, 'acc_word': 0.996564, 'loss_grapheme': 0.017559, 'loss_vowel': 0.007631, 'loss_consonant': 0.005591, 'loss_word': 0.017103}
   22 | 0.000170 | 160640/160678 | 0.6231 | 5.4009 | 14.49 ||
val: {'recall': 0.997624, 'recall_grapheme': 0.996431, 'recall_vowel': 0.998802, 'recall_consonant': 0.998832, 'recall_word': 0.996984, 'acc_grapheme': 0.997012, 'acc_vowel': 0.99888, 'acc_consonant': 0.

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997836, 'recall_grapheme': 0.996844, 'recall_vowel': 0.998737, 'recall_consonant': 0.998919, 'recall_word': 0.99717, 'acc_grapheme': 0.997336, 'acc_vowel': 0.99883, 'acc_consonant': 0.999054, 'acc_word': 0.997137, 'loss_grapheme': 0.012433, 'loss_vowel': 0.006681, 'loss_consonant': 0.00448, 'loss_word': 0.01352}
SWA>>>:


100%|██████████| 1255/1255 [07:09<00:00,  2.92it/s]



val: {'recall': 0.998638, 'recall_grapheme': 0.998007, 'recall_vowel': 0.999169, 'recall_consonant': 0.999367, 'recall_word': 0.998153, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999303, 'acc_consonant': 0.999402, 'acc_word': 0.998133, 'loss_grapheme': 0.008465, 'loss_vowel': 0.004499, 'loss_consonant': 0.002796, 'loss_word': 0.009531}
   24 | 0.000157 | 160640/160678 | 6.5433 | 5.2514 | 14.51 ||
val: {'recall': 0.997719, 'recall_grapheme': 0.996362, 'recall_vowel': 0.99892, 'recall_consonant': 0.999231, 'recall_word': 0.996919, 'acc_grapheme': 0.997087, 'acc_vowel': 0.998979, 'acc_consonant': 0.999228, 'acc_word': 0.996888, 'loss_grapheme': 0.013398, 'loss_vowel': 0.00608, 'loss_consonant': 0.00423, 'loss_word': 0.013578}
   25 | 0.000151 | 160640/160678 | 8.9119 | 5.2050 | 14.51 ||
val: {'recall': 0.997976, 'recall_grapheme': 0.996898, 'recall_vowel': 0.998985, 'recall_consonant': 0.999123, 'recall_word': 0.997149, 'acc_grapheme': 0.997286, 'acc_vowel': 0.999054, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997916, 'recall_grapheme': 0.996739, 'recall_vowel': 0.999088, 'recall_consonant': 0.999098, 'recall_word': 0.996817, 'acc_grapheme': 0.996913, 'acc_vowel': 0.999004, 'acc_consonant': 0.999228, 'acc_word': 0.996813, 'loss_grapheme': 0.014415, 'loss_vowel': 0.005726, 'loss_consonant': 0.00394, 'loss_word': 0.014763}
SWA>>>:


100%|██████████| 1255/1255 [07:10<00:00,  2.87it/s]



val: {'recall': 0.998661, 'recall_grapheme': 0.998046, 'recall_vowel': 0.999185, 'recall_consonant': 0.999367, 'recall_word': 0.998141, 'acc_grapheme': 0.998257, 'acc_vowel': 0.999303, 'acc_consonant': 0.999402, 'acc_word': 0.998133, 'loss_grapheme': 0.008368, 'loss_vowel': 0.004443, 'loss_consonant': 0.002757, 'loss_word': 0.009435}
   27 | 0.000138 | 160640/160678 | 11.2563 | 5.3930 | 14.50 |
val: {'recall': 0.998212, 'recall_grapheme': 0.997371, 'recall_vowel': 0.999039, 'recall_consonant': 0.999065, 'recall_word': 0.997252, 'acc_grapheme': 0.99746, 'acc_vowel': 0.999004, 'acc_consonant': 0.999203, 'acc_word': 0.997211, 'loss_grapheme': 0.012841, 'loss_vowel': 0.006172, 'loss_consonant': 0.003623, 'loss_word': 0.013512}
   28 | 0.000132 | 160640/160678 | 4.3173 | 5.1741 | 14.55 ||
val: {'recall': 0.997943, 'recall_grapheme': 0.9967, 'recall_vowel': 0.999137, 'recall_consonant': 0.999234, 'recall_word': 0.997096, 'acc_grapheme': 0.997137, 'acc_vowel': 0.999054, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99807, 'recall_grapheme': 0.99712, 'recall_vowel': 0.99894, 'recall_consonant': 0.999102, 'recall_word': 0.997234, 'acc_grapheme': 0.997161, 'acc_vowel': 0.999079, 'acc_consonant': 0.999203, 'acc_word': 0.997186, 'loss_grapheme': 0.013471, 'loss_vowel': 0.005891, 'loss_consonant': 0.003915, 'loss_word': 0.014032}
SWA>>>:


100%|██████████| 1255/1255 [07:10<00:00,  2.83it/s]



val: {'recall': 0.998664, 'recall_grapheme': 0.99806, 'recall_vowel': 0.999169, 'recall_consonant': 0.999367, 'recall_word': 0.998146, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998133, 'loss_grapheme': 0.008362, 'loss_vowel': 0.004434, 'loss_consonant': 0.002749, 'loss_word': 0.009404}
   30 | 0.000118 | 160640/160678 | 10.6236 | 5.2862 | 14.55 |
val: {'recall': 0.998268, 'recall_grapheme': 0.997479, 'recall_vowel': 0.998972, 'recall_consonant': 0.999142, 'recall_word': 0.99724, 'acc_grapheme': 0.997535, 'acc_vowel': 0.99888, 'acc_consonant': 0.999203, 'acc_word': 0.997211, 'loss_grapheme': 0.012191, 'loss_vowel': 0.006194, 'loss_consonant': 0.003694, 'loss_word': 0.013015}
   31 | 0.000112 | 160640/160678 | 0.0449 | 5.2187 | 14.53 ||
val: {'recall': 0.998297, 'recall_grapheme': 0.997438, 'recall_vowel': 0.999195, 'recall_consonant': 0.999116, 'recall_word': 0.997543, 'acc_grapheme': 0.997635, 'acc_vowel': 0.999104, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997842, 'recall_grapheme': 0.996827, 'recall_vowel': 0.998926, 'recall_consonant': 0.998788, 'recall_word': 0.997023, 'acc_grapheme': 0.997087, 'acc_vowel': 0.998904, 'acc_consonant': 0.999129, 'acc_word': 0.996962, 'loss_grapheme': 0.014925, 'loss_vowel': 0.006424, 'loss_consonant': 0.003951, 'loss_word': 0.014168}
SWA>>>:


100%|██████████| 1255/1255 [07:12<00:00,  2.83it/s]



val: {'recall': 0.998655, 'recall_grapheme': 0.998036, 'recall_vowel': 0.999181, 'recall_consonant': 0.999367, 'recall_word': 0.998071, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998058, 'loss_grapheme': 0.008365, 'loss_vowel': 0.004428, 'loss_consonant': 0.002735, 'loss_word': 0.009399}
   33 | 0.000099 | 160640/160678 | 5.5048 | 4.9637 | 14.57 ||
val: {'recall': 0.997511, 'recall_grapheme': 0.9963, 'recall_vowel': 0.998547, 'recall_consonant': 0.998896, 'recall_word': 0.996973, 'acc_grapheme': 0.996888, 'acc_vowel': 0.998531, 'acc_consonant': 0.999079, 'acc_word': 0.996937, 'loss_grapheme': 0.014194, 'loss_vowel': 0.007255, 'loss_consonant': 0.004481, 'loss_word': 0.014282}
   34 | 0.000093 | 160640/160678 | 7.2348 | 5.2418 | 14.52 ||
val: {'recall': 0.998416, 'recall_grapheme': 0.997607, 'recall_vowel': 0.999101, 'recall_consonant': 0.99935, 'recall_word': 0.997628, 'acc_grapheme': 0.99756, 'acc_vowel': 0.999029, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998287, 'recall_grapheme': 0.997458, 'recall_vowel': 0.998978, 'recall_consonant': 0.999255, 'recall_word': 0.997451, 'acc_grapheme': 0.99751, 'acc_vowel': 0.999004, 'acc_consonant': 0.999303, 'acc_word': 0.997386, 'loss_grapheme': 0.01233, 'loss_vowel': 0.006133, 'loss_consonant': 0.003771, 'loss_word': 0.013084}
SWA>>>:


100%|██████████| 1255/1255 [07:16<00:00,  2.80it/s]



val: {'recall': 0.998687, 'recall_grapheme': 0.9981, 'recall_vowel': 0.999181, 'recall_consonant': 0.999367, 'recall_word': 0.998098, 'acc_grapheme': 0.998257, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998083, 'loss_grapheme': 0.008379, 'loss_vowel': 0.00443, 'loss_consonant': 0.002735, 'loss_word': 0.009427}
   36 | 0.000080 | 160640/160678 | 2.6641 | 4.9609 | 14.59 ||
val: {'recall': 0.997963, 'recall_grapheme': 0.996947, 'recall_vowel': 0.998795, 'recall_consonant': 0.999163, 'recall_word': 0.997358, 'acc_grapheme': 0.997311, 'acc_vowel': 0.998855, 'acc_consonant': 0.999253, 'acc_word': 0.997286, 'loss_grapheme': 0.014549, 'loss_vowel': 0.006712, 'loss_consonant': 0.003693, 'loss_word': 0.014851}
   37 | 0.000074 | 160640/160678 | 6.5058 | 5.1514 | 14.56 ||
val: {'recall': 0.9978, 'recall_grapheme': 0.99668, 'recall_vowel': 0.998709, 'recall_consonant': 0.99913, 'recall_word': 0.996864, 'acc_grapheme': 0.996813, 'acc_vowel': 0.99878, 'acc_consonant': 0.999153,

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998354, 'recall_grapheme': 0.997501, 'recall_vowel': 0.998999, 'recall_consonant': 0.999417, 'recall_word': 0.997551, 'acc_grapheme': 0.997734, 'acc_vowel': 0.999004, 'acc_consonant': 0.999378, 'acc_word': 0.99751, 'loss_grapheme': 0.011244, 'loss_vowel': 0.005993, 'loss_consonant': 0.003242, 'loss_word': 0.012168}
SWA>>>:


 96%|█████████▌| 1200/1255 [06:51<00:18,  2.92it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   39 | 0.000063 | 160640/160678 | 0.5817 | 5.1170 | 14.59 ||
val: {'recall': 0.998401, 'recall_grapheme': 0.99756, 'recall_vowel': 0.999098, 'recall_consonant': 0.999386, 'recall_word': 0.997519, 'acc_grapheme': 0.99761, 'acc_vowel': 0.998979, 'acc_consonant': 0.999378, 'acc_word': 0.99746, 'loss_grapheme': 0.012282, 'loss_vowel': 0.005884, 'loss_consonant': 0.003171, 'loss_word': 0.01325}
   40 | 0.000057 | 160640/160678 | 4.8003 | 4.9751 | 14.60 ||
val: {'recall': 0.99822, 'recall_grapheme': 0.997178, 'recall_vowel': 0.99909, 'recall_consonant': 0.999434, 'recall_word': 0.997488, 'acc_grapheme': 0.99741, 'acc_vowel': 0.998979, 'acc_consonant': 0.999278, 'acc_word': 0.99741, 'loss_grapheme': 0.012117, 'loss_vowel': 0.00626, 'loss_consonant': 0.003302, 'loss_word': 0.012936}
   41 | 0.000052 | 160640/160678 | 0.0239 | 5.2305 | 14.59 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99828, 'recall_grapheme': 0.997505, 'recall_vowel': 0.998879, 'recall_consonant': 0.999232, 'recall_word': 0.997508, 'acc_grapheme': 0.99756, 'acc_vowel': 0.998904, 'acc_consonant': 0.999328, 'acc_word': 0.997435, 'loss_grapheme': 0.012251, 'loss_vowel': 0.006127, 'loss_consonant': 0.003465, 'loss_word': 0.012766}
SWA>>>:


100%|██████████| 1255/1255 [07:14<00:00,  2.86it/s]



val: {'recall': 0.998665, 'recall_grapheme': 0.998012, 'recall_vowel': 0.999267, 'recall_consonant': 0.999367, 'recall_word': 0.998148, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998133, 'loss_grapheme': 0.008334, 'loss_vowel': 0.004414, 'loss_consonant': 0.002699, 'loss_word': 0.009401}
   42 | 0.000046 | 160640/160678 | 6.1952 | 4.7572 | 14.60 ||
val: {'recall': 0.998068, 'recall_grapheme': 0.997093, 'recall_vowel': 0.998856, 'recall_consonant': 0.99923, 'recall_word': 0.997205, 'acc_grapheme': 0.997236, 'acc_vowel': 0.998904, 'acc_consonant': 0.999278, 'acc_word': 0.997161, 'loss_grapheme': 0.012601, 'loss_vowel': 0.006109, 'loss_consonant': 0.00349, 'loss_word': 0.013101}
   43 | 0.000041 | 160640/160678 | 3.8122 | 5.1541 | 14.57 ||
val: {'recall': 0.998138, 'recall_grapheme': 0.997338, 'recall_vowel': 0.99885, 'recall_consonant': 0.999025, 'recall_word': 0.99733, 'acc_grapheme': 0.997286, 'acc_vowel': 0.998929, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998477, 'recall_grapheme': 0.997627, 'recall_vowel': 0.999149, 'recall_consonant': 0.999504, 'recall_word': 0.997792, 'acc_grapheme': 0.997859, 'acc_vowel': 0.999104, 'acc_consonant': 0.999378, 'acc_word': 0.997734, 'loss_grapheme': 0.010453, 'loss_vowel': 0.005473, 'loss_consonant': 0.003032, 'loss_word': 0.011557}
SWA>>>:


100%|██████████| 1255/1255 [07:11<00:00,  2.69it/s]



val: {'recall': 0.998724, 'recall_grapheme': 0.998133, 'recall_vowel': 0.999292, 'recall_consonant': 0.999339, 'recall_word': 0.998104, 'acc_grapheme': 0.998282, 'acc_vowel': 0.999303, 'acc_consonant': 0.999402, 'acc_word': 0.998083, 'loss_grapheme': 0.008308, 'loss_vowel': 0.004388, 'loss_consonant': 0.002688, 'loss_word': 0.009364}
   45 | 0.000032 | 157312/160678 | 10.9290 | 5.0613 | 14.27 |

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...

val: {'recall': 0.998635, 'recall_grapheme': 0.998031, 'recall_vowel': 0.999094, 'recall_consonant': 0.999381, 'recall_word': 0.99789, 'acc_grapheme': 0.998008, 'acc_vowel': 0.999178, 'acc_consonant': 0.999303, 'acc_word': 0.997809, 'loss_grapheme': 0.010964, 'loss_vowel': 0.005378, 'loss_consonant': 0.003544, 'loss_word': 0.011642}

val: {'recall': 0.998699, 'recall_grapheme': 0.998145, 'recall_vowel': 0.999195, 'recall_consonant': 0.999311, 'recall_word': 0.998223, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998207, 'loss_grapheme': 0.00876, 'loss_vowel': 0.004544, 'loss_consonant': 0.002715, 'loss_word': 0.00971}
CYCLE: 1
    0 | 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997923, 'recall_grapheme': 0.997141, 'recall_vowel': 0.998443, 'recall_consonant': 0.998969, 'recall_word': 0.996917, 'acc_grapheme': 0.996788, 'acc_vowel': 0.998705, 'acc_consonant': 0.998979, 'acc_word': 0.996838, 'loss_grapheme': 0.015744, 'loss_vowel': 0.006867, 'loss_consonant': 0.004946, 'loss_word': 0.015373}
SWA>>>:


100%|██████████| 1255/1255 [07:08<00:00,  2.82it/s]



val: {'recall': 0.99877, 'recall_grapheme': 0.998213, 'recall_vowel': 0.999307, 'recall_consonant': 0.999345, 'recall_word': 0.998201, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999328, 'acc_consonant': 0.999427, 'acc_word': 0.998182, 'loss_grapheme': 0.008726, 'loss_vowel': 0.004444, 'loss_consonant': 0.002708, 'loss_word': 0.009676}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
    2 | 0.000195 | 160640/160678 | 6.1217 | 5.1876 | 13.95 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997975, 'recall_grapheme': 0.997399, 'recall_vowel': 0.998919, 'recall_consonant': 0.998184, 'recall_word': 0.997361, 'acc_grapheme': 0.99741, 'acc_vowel': 0.998929, 'acc_consonant': 0.999129, 'acc_word': 0.997311, 'loss_grapheme': 0.012436, 'loss_vowel': 0.006761, 'loss_consonant': 0.004337, 'loss_word': 0.013756}
SWA>>>:


100%|██████████| 1255/1255 [07:07<00:00,  2.90it/s]



val: {'recall': 0.998696, 'recall_grapheme': 0.998152, 'recall_vowel': 0.999073, 'recall_consonant': 0.999407, 'recall_word': 0.998206, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999253, 'acc_consonant': 0.999427, 'acc_word': 0.998182, 'loss_grapheme': 0.008611, 'loss_vowel': 0.004636, 'loss_consonant': 0.002711, 'loss_word': 0.009674}
    3 | 0.000191 | 160640/160678 | 0.6315 | 5.0384 | 14.05 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99816, 'recall_grapheme': 0.997198, 'recall_vowel': 0.998924, 'recall_consonant': 0.999318, 'recall_word': 0.997163, 'acc_grapheme': 0.997311, 'acc_vowel': 0.998979, 'acc_consonant': 0.999178, 'acc_word': 0.997137, 'loss_grapheme': 0.013195, 'loss_vowel': 0.006487, 'loss_consonant': 0.0043, 'loss_word': 0.013692}
SWA>>>:


100%|██████████| 1255/1255 [07:00<00:00,  2.88it/s]



val: {'recall': 0.998646, 'recall_grapheme': 0.99804, 'recall_vowel': 0.999097, 'recall_consonant': 0.999407, 'recall_word': 0.998208, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999278, 'acc_consonant': 0.999427, 'acc_word': 0.998182, 'loss_grapheme': 0.008511, 'loss_vowel': 0.004617, 'loss_consonant': 0.002616, 'loss_word': 0.009548}
    4 | 0.000187 | 160640/160678 | 5.3596 | 5.2192 | 14.18 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997575, 'recall_grapheme': 0.996647, 'recall_vowel': 0.998802, 'recall_consonant': 0.998205, 'recall_word': 0.997022, 'acc_grapheme': 0.997087, 'acc_vowel': 0.99883, 'acc_consonant': 0.999153, 'acc_word': 0.996962, 'loss_grapheme': 0.013874, 'loss_vowel': 0.007573, 'loss_consonant': 0.004432, 'loss_word': 0.0155}
SWA>>>:


100%|██████████| 1255/1255 [07:06<00:00,  2.83it/s]



val: {'recall': 0.998707, 'recall_grapheme': 0.998073, 'recall_vowel': 0.999181, 'recall_consonant': 0.999499, 'recall_word': 0.99824, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999303, 'acc_consonant': 0.999477, 'acc_word': 0.998207, 'loss_grapheme': 0.008539, 'loss_vowel': 0.004695, 'loss_consonant': 0.002574, 'loss_word': 0.009687}
    5 | 0.000181 | 160640/160678 | 0.1329 | 5.3490 | 14.29 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998107, 'recall_grapheme': 0.997296, 'recall_vowel': 0.998884, 'recall_consonant': 0.99895, 'recall_word': 0.997125, 'acc_grapheme': 0.997311, 'acc_vowel': 0.998929, 'acc_consonant': 0.999079, 'acc_word': 0.997037, 'loss_grapheme': 0.01306, 'loss_vowel': 0.006658, 'loss_consonant': 0.00459, 'loss_word': 0.013936}
SWA>>>:


100%|██████████| 1255/1255 [07:04<00:00,  2.89it/s]



val: {'recall': 0.998659, 'recall_grapheme': 0.997983, 'recall_vowel': 0.99917, 'recall_consonant': 0.999499, 'recall_word': 0.998235, 'acc_grapheme': 0.998232, 'acc_vowel': 0.999278, 'acc_consonant': 0.999477, 'acc_word': 0.998207, 'loss_grapheme': 0.008521, 'loss_vowel': 0.004618, 'loss_consonant': 0.002588, 'loss_word': 0.009628}
    6 | 0.000175 | 150528/160678 | 0.1704 | 5.2703 | 13.41 ||

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...

val: {'recall': 0.998333, 'recall_grapheme': 0.99753, 'recall_vowel': 0.99891, 'recall_consonant': 0.999362, 'recall_word': 0.997679, 'acc_grapheme': 0.997859, 'acc_vowel': 0.999104, 'acc_consonant': 0.999378, 'acc_word': 0.997585, 'loss_grapheme': 0.011018, 'loss_vowel': 0.005305, 'loss_consonant': 0.003185, 'loss_word': 0.011873}

val: {'recall': 0.998693, 'recall_grapheme': 0.998173, 'recall_vowel': 0.999111, 'recall_consonant': 0.999317, 'recall_word': 0.998115, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.998083, 'loss_grapheme': 0.009206, 'loss_vowel': 0.004678, 'loss_consonant': 0.002865, 'loss_word': 0.010286}
CYCLE: 1
   14 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997335, 'recall_grapheme': 0.995933, 'recall_vowel': 0.998453, 'recall_consonant': 0.999021, 'recall_word': 0.996675, 'acc_grapheme': 0.996489, 'acc_vowel': 0.998655, 'acc_consonant': 0.999004, 'acc_word': 0.996614, 'loss_grapheme': 0.017132, 'loss_vowel': 0.007924, 'loss_consonant': 0.004922, 'loss_word': 0.016749}
SWA>>>:


100%|██████████| 1255/1255 [07:00<00:00,  2.89it/s]



val: {'recall': 0.998679, 'recall_grapheme': 0.998159, 'recall_vowel': 0.999113, 'recall_consonant': 0.999283, 'recall_word': 0.998124, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999203, 'acc_consonant': 0.999402, 'acc_word': 0.998108, 'loss_grapheme': 0.009091, 'loss_vowel': 0.00466, 'loss_consonant': 0.002802, 'loss_word': 0.010135}
   15 | 0.000230 | 160640/160678 | 0.7302 | 5.2954 | 14.26 ||
val: {'recall': 0.997845, 'recall_grapheme': 0.996834, 'recall_vowel': 0.998844, 'recall_consonant': 0.998867, 'recall_word': 0.996814, 'acc_grapheme': 0.996713, 'acc_vowel': 0.99883, 'acc_consonant': 0.999004, 'acc_word': 0.996763, 'loss_grapheme': 0.015115, 'loss_vowel': 0.006485, 'loss_consonant': 0.004953, 'loss_word': 0.015424}
   16 | 0.000222 | 160640/160678 | 2.3889 | 5.3610 | 14.23 ||
val: {'recall': 0.997374, 'recall_grapheme': 0.996045, 'recall_vowel': 0.998492, 'recall_consonant': 0.998916, 'recall_word': 0.996365, 'acc_grapheme': 0.996464, 'acc_vowel': 0.998382, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997296, 'recall_grapheme': 0.996153, 'recall_vowel': 0.99845, 'recall_consonant': 0.998429, 'recall_word': 0.996447, 'acc_grapheme': 0.996514, 'acc_vowel': 0.998805, 'acc_consonant': 0.998929, 'acc_word': 0.996365, 'loss_grapheme': 0.016766, 'loss_vowel': 0.007273, 'loss_consonant': 0.004961, 'loss_word': 0.01594}
SWA>>>:


100%|██████████| 1255/1255 [06:57<00:00,  2.73it/s]



val: {'recall': 0.998645, 'recall_grapheme': 0.998105, 'recall_vowel': 0.999052, 'recall_consonant': 0.999317, 'recall_word': 0.998098, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.998083, 'loss_grapheme': 0.009157, 'loss_vowel': 0.00469, 'loss_consonant': 0.002806, 'loss_word': 0.010149}
   18 | 0.000205 | 160640/160678 | 0.3231 | 5.5125 | 14.21 ||
val: {'recall': 0.997979, 'recall_grapheme': 0.997036, 'recall_vowel': 0.998783, 'recall_consonant': 0.999061, 'recall_word': 0.997073, 'acc_grapheme': 0.996913, 'acc_vowel': 0.999004, 'acc_consonant': 0.999054, 'acc_word': 0.997037, 'loss_grapheme': 0.012922, 'loss_vowel': 0.005699, 'loss_consonant': 0.004861, 'loss_word': 0.013208}
   19 | 0.000196 | 160640/160678 | 15.0756 | 5.1278 | 14.24 |
val: {'recall': 0.997745, 'recall_grapheme': 0.996598, 'recall_vowel': 0.9988, 'recall_consonant': 0.998986, 'recall_word': 0.996517, 'acc_grapheme': 0.99639, 'acc_vowel': 0.998805, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997402, 'recall_grapheme': 0.995941, 'recall_vowel': 0.998662, 'recall_consonant': 0.999065, 'recall_word': 0.996625, 'acc_grapheme': 0.996713, 'acc_vowel': 0.99873, 'acc_consonant': 0.999153, 'acc_word': 0.996564, 'loss_grapheme': 0.01558, 'loss_vowel': 0.006996, 'loss_consonant': 0.004196, 'loss_word': 0.016121}
SWA>>>:


100%|██████████| 1255/1255 [07:00<00:00,  2.89it/s]



val: {'recall': 0.998649, 'recall_grapheme': 0.998123, 'recall_vowel': 0.999072, 'recall_consonant': 0.99928, 'recall_word': 0.998153, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999228, 'acc_consonant': 0.999402, 'acc_word': 0.998133, 'loss_grapheme': 0.009019, 'loss_vowel': 0.004649, 'loss_consonant': 0.002781, 'loss_word': 0.010026}
   21 | 0.000178 | 160640/160678 | 0.5266 | 5.2032 | 14.29 ||
val: {'recall': 0.997814, 'recall_grapheme': 0.996566, 'recall_vowel': 0.998904, 'recall_consonant': 0.999222, 'recall_word': 0.99699, 'acc_grapheme': 0.996913, 'acc_vowel': 0.998929, 'acc_consonant': 0.999228, 'acc_word': 0.996913, 'loss_grapheme': 0.014154, 'loss_vowel': 0.006299, 'loss_consonant': 0.004039, 'loss_word': 0.014068}
   22 | 0.000169 | 160640/160678 | 6.6175 | 5.4540 | 14.25 ||
val: {'recall': 0.997519, 'recall_grapheme': 0.996161, 'recall_vowel': 0.998926, 'recall_consonant': 0.998829, 'recall_word': 0.996913, 'acc_grapheme': 0.996489, 'acc_vowel': 0.998855, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998191, 'recall_grapheme': 0.99739, 'recall_vowel': 0.998808, 'recall_consonant': 0.999176, 'recall_word': 0.997629, 'acc_grapheme': 0.997435, 'acc_vowel': 0.999029, 'acc_consonant': 0.999228, 'acc_word': 0.997585, 'loss_grapheme': 0.011968, 'loss_vowel': 0.005496, 'loss_consonant': 0.004013, 'loss_word': 0.012078}
SWA>>>:


100%|██████████| 1255/1255 [07:01<00:00,  2.82it/s]



val: {'recall': 0.998661, 'recall_grapheme': 0.998121, 'recall_vowel': 0.999056, 'recall_consonant': 0.999348, 'recall_word': 0.998177, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999228, 'acc_consonant': 0.999452, 'acc_word': 0.998157, 'loss_grapheme': 0.008959, 'loss_vowel': 0.00458, 'loss_consonant': 0.002757, 'loss_word': 0.009929}
   24 | 0.000150 | 160640/160678 | 14.0821 | 5.3786 | 14.27 |
val: {'recall': 0.997268, 'recall_grapheme': 0.996295, 'recall_vowel': 0.998629, 'recall_consonant': 0.997851, 'recall_word': 0.99665, 'acc_grapheme': 0.996639, 'acc_vowel': 0.998755, 'acc_consonant': 0.999029, 'acc_word': 0.996589, 'loss_grapheme': 0.014659, 'loss_vowel': 0.006548, 'loss_consonant': 0.004481, 'loss_word': 0.014718}
   25 | 0.000141 | 160640/160678 | 0.3014 | 5.2575 | 14.28 ||
val: {'recall': 0.997658, 'recall_grapheme': 0.996281, 'recall_vowel': 0.999041, 'recall_consonant': 0.999029, 'recall_word': 0.997048, 'acc_grapheme': 0.996913, 'acc_vowel': 0.999029, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997941, 'recall_grapheme': 0.996887, 'recall_vowel': 0.998789, 'recall_consonant': 0.999201, 'recall_word': 0.997053, 'acc_grapheme': 0.996913, 'acc_vowel': 0.99883, 'acc_consonant': 0.999153, 'acc_word': 0.996987, 'loss_grapheme': 0.01497, 'loss_vowel': 0.006762, 'loss_consonant': 0.004143, 'loss_word': 0.01535}
SWA>>>:


100%|██████████| 1255/1255 [07:04<00:00,  2.83it/s]



val: {'recall': 0.99866, 'recall_grapheme': 0.998121, 'recall_vowel': 0.999084, 'recall_consonant': 0.999314, 'recall_word': 0.998124, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999253, 'acc_consonant': 0.999427, 'acc_word': 0.998108, 'loss_grapheme': 0.00894, 'loss_vowel': 0.00458, 'loss_consonant': 0.002743, 'loss_word': 0.009877}
   27 | 0.000122 | 160640/160678 | 0.6731 | 5.1430 | 14.33 ||
val: {'recall': 0.997824, 'recall_grapheme': 0.997132, 'recall_vowel': 0.998871, 'recall_consonant': 0.998161, 'recall_word': 0.997137, 'acc_grapheme': 0.997186, 'acc_vowel': 0.998954, 'acc_consonant': 0.999228, 'acc_word': 0.997037, 'loss_grapheme': 0.013161, 'loss_vowel': 0.006296, 'loss_consonant': 0.00392, 'loss_word': 0.013682}
   28 | 0.000113 | 160640/160678 | 3.4613 | 5.3021 | 14.31 ||
val: {'recall': 0.9978, 'recall_grapheme': 0.997051, 'recall_vowel': 0.998997, 'recall_consonant': 0.998102, 'recall_word': 0.997122, 'acc_grapheme': 0.997186, 'acc_vowel': 0.999029, 'acc_consonant': 0.99925

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99815, 'recall_grapheme': 0.997119, 'recall_vowel': 0.998991, 'recall_consonant': 0.999368, 'recall_word': 0.997346, 'acc_grapheme': 0.997062, 'acc_vowel': 0.999104, 'acc_consonant': 0.999353, 'acc_word': 0.997236, 'loss_grapheme': 0.013996, 'loss_vowel': 0.006047, 'loss_consonant': 0.003697, 'loss_word': 0.014135}
SWA>>>:


100%|██████████| 1255/1255 [07:02<00:00,  2.89it/s]



val: {'recall': 0.998684, 'recall_grapheme': 0.998114, 'recall_vowel': 0.999195, 'recall_consonant': 0.999314, 'recall_word': 0.998174, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999278, 'acc_consonant': 0.999427, 'acc_word': 0.998157, 'loss_grapheme': 0.008877, 'loss_vowel': 0.004577, 'loss_consonant': 0.002729, 'loss_word': 0.009808}
   30 | 0.000095 | 160640/160678 | 0.5284 | 4.9906 | 14.30 ||
val: {'recall': 0.998069, 'recall_grapheme': 0.997005, 'recall_vowel': 0.998973, 'recall_consonant': 0.999291, 'recall_word': 0.997226, 'acc_grapheme': 0.997286, 'acc_vowel': 0.999054, 'acc_consonant': 0.999353, 'acc_word': 0.997137, 'loss_grapheme': 0.012121, 'loss_vowel': 0.005826, 'loss_consonant': 0.003594, 'loss_word': 0.012926}
   31 | 0.000086 | 160640/160678 | 7.2720 | 5.0306 | 14.36 ||
val: {'recall': 0.998432, 'recall_grapheme': 0.997733, 'recall_vowel': 0.999009, 'recall_consonant': 0.999252, 'recall_word': 0.997442, 'acc_grapheme': 0.997684, 'acc_vowel': 0.999054, 'acc_consonant': 0

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997701, 'recall_grapheme': 0.996908, 'recall_vowel': 0.9987, 'recall_consonant': 0.998286, 'recall_word': 0.996905, 'acc_grapheme': 0.996913, 'acc_vowel': 0.998805, 'acc_consonant': 0.999004, 'acc_word': 0.996888, 'loss_grapheme': 0.01527, 'loss_vowel': 0.006938, 'loss_consonant': 0.005066, 'loss_word': 0.015668}
SWA>>>:


100%|██████████| 1255/1255 [07:02<00:00,  2.78it/s]



val: {'recall': 0.998675, 'recall_grapheme': 0.998098, 'recall_vowel': 0.999195, 'recall_consonant': 0.999308, 'recall_word': 0.998197, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998182, 'loss_grapheme': 0.008812, 'loss_vowel': 0.004559, 'loss_consonant': 0.002725, 'loss_word': 0.009755}
   33 | 0.000070 | 160640/160678 | 3.7949 | 5.3404 | 14.29 ||
val: {'recall': 0.998445, 'recall_grapheme': 0.997878, 'recall_vowel': 0.998936, 'recall_consonant': 0.999086, 'recall_word': 0.997539, 'acc_grapheme': 0.997734, 'acc_vowel': 0.999004, 'acc_consonant': 0.999178, 'acc_word': 0.99751, 'loss_grapheme': 0.01196, 'loss_vowel': 0.00603, 'loss_consonant': 0.003903, 'loss_word': 0.012519}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
   34 | 0.000062 | 160640/160678 | 7.3520 | 5.2046 | 14.35 ||
val: {'recall': 0.998023, 'recall_grapheme': 0.99711, 'recall_vowel': 0.998932, 'recall_consonant': 0.99894, 'recall_word': 0.997441,

  0%|          | 0/1255 [00:00<?, ?it/s]

###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
SWA>>>:


100%|██████████| 1255/1255 [07:03<00:00,  2.86it/s]



val: {'recall': 0.998699, 'recall_grapheme': 0.998145, 'recall_vowel': 0.999195, 'recall_consonant': 0.999311, 'recall_word': 0.998223, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998207, 'loss_grapheme': 0.00876, 'loss_vowel': 0.004544, 'loss_consonant': 0.002715, 'loss_word': 0.00971}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   36 | 0.000047 | 160640/160678 | 6.2847 | 4.9712 | 14.31 ||
val: {'recall': 0.997978, 'recall_grapheme': 0.996977, 'recall_vowel': 0.998726, 'recall_consonant': 0.999232, 'recall_word': 0.997248, 'acc_grapheme': 0.997435, 'acc_vowel': 0.998929, 'acc_consonant': 0.999153, 'acc_word': 0.997186, 'loss_grapheme': 0.012675, 'loss_vowel': 0.006403, 'loss_consonant': 0.003889, 'loss_word': 0.013268}
   37 | 0.000041 | 160640/160678 | 0.2475 | 4.9343 | 14.37 ||
val: {'recall': 0.99854, 'recall_grapheme': 0.997905, 'recall_vowel': 0.999032, 'recall_consonant': 0.999316, 'recall_word': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998168, 'recall_grapheme': 0.997426, 'recall_vowel': 0.998948, 'recall_consonant': 0.998871, 'recall_word': 0.997451, 'acc_grapheme': 0.99751, 'acc_vowel': 0.999054, 'acc_consonant': 0.999129, 'acc_word': 0.997361, 'loss_grapheme': 0.01266, 'loss_vowel': 0.006071, 'loss_consonant': 0.003706, 'loss_word': 0.013156}
SWA>>>:


100%|██████████| 1255/1255 [07:07<00:00,  2.84it/s]



val: {'recall': 0.998674, 'recall_grapheme': 0.998093, 'recall_vowel': 0.999195, 'recall_consonant': 0.999317, 'recall_word': 0.998161, 'acc_grapheme': 0.998108, 'acc_vowel': 0.999278, 'acc_consonant': 0.999427, 'acc_word': 0.998133, 'loss_grapheme': 0.008754, 'loss_vowel': 0.004538, 'loss_consonant': 0.002696, 'loss_word': 0.009696}
   39 | 0.000029 | 160640/160678 | 6.3169 | 4.9090 | 14.37 ||
val: {'recall': 0.99814, 'recall_grapheme': 0.997195, 'recall_vowel': 0.998929, 'recall_consonant': 0.999241, 'recall_word': 0.99731, 'acc_grapheme': 0.997386, 'acc_vowel': 0.999079, 'acc_consonant': 0.999203, 'acc_word': 0.997211, 'loss_grapheme': 0.012578, 'loss_vowel': 0.006002, 'loss_consonant': 0.004154, 'loss_word': 0.013199}
   40 | 0.000023 | 160640/160678 | 5.1075 | 5.0208 | 14.37 ||
val: {'recall': 0.998635, 'recall_grapheme': 0.998031, 'recall_vowel': 0.999094, 'recall_consonant': 0.999381, 'recall_word': 0.99789, 'acc_grapheme': 0.998008, 'acc_vowel': 0.999178, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998493, 'recall_grapheme': 0.997845, 'recall_vowel': 0.99894, 'recall_consonant': 0.999342, 'recall_word': 0.997745, 'acc_grapheme': 0.997784, 'acc_vowel': 0.999104, 'acc_consonant': 0.999253, 'acc_word': 0.997684, 'loss_grapheme': 0.011661, 'loss_vowel': 0.005512, 'loss_consonant': 0.00357, 'loss_word': 0.012132}
SWA>>>:


100%|██████████| 1255/1255 [07:06<00:00,  2.88it/s]



val: {'recall': 0.998681, 'recall_grapheme': 0.998093, 'recall_vowel': 0.999195, 'recall_consonant': 0.999345, 'recall_word': 0.998133, 'acc_grapheme': 0.998108, 'acc_vowel': 0.999278, 'acc_consonant': 0.999427, 'acc_word': 0.998108, 'loss_grapheme': 0.008722, 'loss_vowel': 0.004525, 'loss_consonant': 0.002692, 'loss_word': 0.009677}
   42 | 0.000014 | 160640/160678 | 0.2194 | 5.0817 | 14.32 ||
val: {'recall': 0.998551, 'recall_grapheme': 0.997912, 'recall_vowel': 0.998945, 'recall_consonant': 0.999437, 'recall_word': 0.997717, 'acc_grapheme': 0.997834, 'acc_vowel': 0.999104, 'acc_consonant': 0.999303, 'acc_word': 0.997635, 'loss_grapheme': 0.011008, 'loss_vowel': 0.005124, 'loss_consonant': 0.003353, 'loss_word': 0.011561}
   43 | 0.000011 | 160640/160678 | 0.0357 | 5.0163 | 14.37 ||
val: {'recall': 0.998549, 'recall_grapheme': 0.997832, 'recall_vowel': 0.99909, 'recall_consonant': 0.999442, 'recall_word': 0.997813, 'acc_grapheme': 0.997834, 'acc_vowel': 0.999178, 'acc_consonant': 0.

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998461, 'recall_grapheme': 0.997827, 'recall_vowel': 0.998904, 'recall_consonant': 0.999286, 'recall_word': 0.997761, 'acc_grapheme': 0.997734, 'acc_vowel': 0.999029, 'acc_consonant': 0.999278, 'acc_word': 0.997684, 'loss_grapheme': 0.011131, 'loss_vowel': 0.005547, 'loss_consonant': 0.003633, 'loss_word': 0.011709}
SWA>>>:


100%|██████████| 1255/1255 [07:06<00:00,  2.82it/s]



val: {'recall': 0.998684, 'recall_grapheme': 0.998103, 'recall_vowel': 0.999184, 'recall_consonant': 0.999345, 'recall_word': 0.998161, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999253, 'acc_consonant': 0.999427, 'acc_word': 0.998133, 'loss_grapheme': 0.00873, 'loss_vowel': 0.004539, 'loss_consonant': 0.002699, 'loss_word': 0.009694}
   45 | 0.000005 | 160640/160678 | 7.3381 | 5.2069 | 14.30 ||
val: {'recall': 0.998233, 'recall_grapheme': 0.99732, 'recall_vowel': 0.998974, 'recall_consonant': 0.999317, 'recall_word': 0.99744, 'acc_grapheme': 0.997386, 'acc_vowel': 0.999029, 'acc_consonant': 0.999253, 'acc_word': 0.997336, 'loss_grapheme': 0.01287, 'loss_vowel': 0.00583, 'loss_consonant': 0.003914, 'loss_word': 0.013365}
   46 | 0.000005 | 003840/160678 | 6.0223 | 5.7321 | 0.39 ||

KeyboardInterrupt: 

In [38]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa...

val: {'recall': 0.998268, 'recall_grapheme': 0.997585, 'recall_vowel': 0.998839, 'recall_consonant': 0.999061, 'recall_word': 0.997331, 'acc_grapheme': 0.99746, 'acc_vowel': 0.999104, 'acc_consonant': 0.999228, 'acc_word': 0.997286, 'loss_grapheme': 0.012183, 'loss_vowel': 0.005558, 'loss_consonant': 0.0036, 'loss_word': 0.012152}

val: {'recall': 0.99858, 'recall_grapheme': 0.997991, 'recall_vowel': 0.999056, 'recall_consonant': 0.99928, 'recall_word': 0.997955, 'acc_grapheme': 0.998008, 'acc_vowel': 0.999228, 'acc_consonant': 0.999402, 'acc_word': 0.997933, 'loss_grapheme': 0.009319, 'loss_vowel': 0.004726, 'loss_consonant': 0.002979, 'loss_word': 0.010399}
CYCLE: 1
   14 | 0.

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997629, 'recall_grapheme': 0.996466, 'recall_vowel': 0.998614, 'recall_consonant': 0.998971, 'recall_word': 0.996339, 'acc_grapheme': 0.996564, 'acc_vowel': 0.998755, 'acc_consonant': 0.999029, 'acc_word': 0.99629, 'loss_grapheme': 0.015955, 'loss_vowel': 0.006842, 'loss_consonant': 0.004633, 'loss_word': 0.015611}
SWA>>>:


100%|██████████| 1255/1255 [07:19<00:00,  2.87it/s]



val: {'recall': 0.998625, 'recall_grapheme': 0.998087, 'recall_vowel': 0.999044, 'recall_consonant': 0.99928, 'recall_word': 0.997929, 'acc_grapheme': 0.998033, 'acc_vowel': 0.999228, 'acc_consonant': 0.999402, 'acc_word': 0.997908, 'loss_grapheme': 0.009307, 'loss_vowel': 0.004739, 'loss_consonant': 0.002963, 'loss_word': 0.010409}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   15 | 0.000230 | 160640/160678 | 13.9007 | 5.8293 | 14.33 |
val: {'recall': 0.997549, 'recall_grapheme': 0.996796, 'recall_vowel': 0.998846, 'recall_consonant': 0.997757, 'recall_word': 0.996716, 'acc_grapheme': 0.996788, 'acc_vowel': 0.998929, 'acc_consonant': 0.998805, 'acc_word': 0.996688, 'loss_grapheme': 0.017405, 'loss_vowel': 0.009103, 'loss_consonant': 0.007679, 'loss_word': 0.01575}
   16 | 0.000222 | 160640/160678 | 3.7559 | 5.4858 | 14.41 ||
val: {'recall': 0.996871, 'recall_grapheme': 0.996295, 'recall_vowel': 0.998393, 'recall_consonant': 0.9965, 'recall_word': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997155, 'recall_grapheme': 0.995669, 'recall_vowel': 0.998639, 'recall_consonant': 0.998646, 'recall_word': 0.996465, 'acc_grapheme': 0.996514, 'acc_vowel': 0.99878, 'acc_consonant': 0.99878, 'acc_word': 0.99639, 'loss_grapheme': 0.015976, 'loss_vowel': 0.007017, 'loss_consonant': 0.005186, 'loss_word': 0.016498}
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.91it/s]



val: {'recall': 0.99863, 'recall_grapheme': 0.998102, 'recall_vowel': 0.999068, 'recall_consonant': 0.99925, 'recall_word': 0.99799, 'acc_grapheme': 0.998083, 'acc_vowel': 0.999253, 'acc_consonant': 0.999378, 'acc_word': 0.997983, 'loss_grapheme': 0.009297, 'loss_vowel': 0.004718, 'loss_consonant': 0.002913, 'loss_word': 0.010393}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   18 | 0.000205 | 160640/160678 | 5.4541 | 5.4631 | 14.44 ||
val: {'recall': 0.997027, 'recall_grapheme': 0.996197, 'recall_vowel': 0.998418, 'recall_consonant': 0.997297, 'recall_word': 0.996598, 'acc_grapheme': 0.996539, 'acc_vowel': 0.99873, 'acc_consonant': 0.998904, 'acc_word': 0.996614, 'loss_grapheme': 0.015664, 'loss_vowel': 0.006621, 'loss_consonant': 0.005375, 'loss_word': 0.01569}
   19 | 0.000196 | 160640/160678 | 3.6985 | 5.3618 | 14.41 ||
val: {'recall': 0.996948, 'recall_grapheme': 0.996174, 'recall_vowel': 0.998897, 'recall_consonant': 0.996547, 'recall_word': 0.996

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   20 | 0.000187 | 160640/160678 | 11.2710 | 5.4096 | 14.47 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998071, 'recall_grapheme': 0.997315, 'recall_vowel': 0.998682, 'recall_consonant': 0.998971, 'recall_word': 0.996768, 'acc_grapheme': 0.996962, 'acc_vowel': 0.998705, 'acc_consonant': 0.998929, 'acc_word': 0.996713, 'loss_grapheme': 0.015383, 'loss_vowel': 0.007967, 'loss_consonant': 0.0055, 'loss_word': 0.01498}
SWA>>>:


100%|██████████| 1255/1255 [07:31<00:00,  2.93it/s]



val: {'recall': 0.998663, 'recall_grapheme': 0.998182, 'recall_vowel': 0.999128, 'recall_consonant': 0.99916, 'recall_word': 0.998067, 'acc_grapheme': 0.998108, 'acc_vowel': 0.999253, 'acc_consonant': 0.999378, 'acc_word': 0.998033, 'loss_grapheme': 0.009251, 'loss_vowel': 0.004671, 'loss_consonant': 0.0029, 'loss_word': 0.010318}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   21 | 0.000178 | 160640/160678 | 12.5309 | 5.5076 | 14.43 |
val: {'recall': 0.997697, 'recall_grapheme': 0.996614, 'recall_vowel': 0.998612, 'recall_consonant': 0.99895, 'recall_word': 0.996739, 'acc_grapheme': 0.996838, 'acc_vowel': 0.998805, 'acc_consonant': 0.999004, 'acc_word': 0.996688, 'loss_grapheme': 0.015249, 'loss_vowel': 0.007159, 'loss_consonant': 0.004438, 'loss_word': 0.015519}
   22 | 0.000169 | 160640/160678 | 0.2651 | 5.2955 | 14.52 ||
val: {'recall': 0.997539, 'recall_grapheme': 0.996806, 'recall_vowel': 0.998602, 'recall_consonant': 0.997944, 'recall_word': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997331, 'recall_grapheme': 0.996773, 'recall_vowel': 0.998477, 'recall_consonant': 0.997302, 'recall_word': 0.99679, 'acc_grapheme': 0.997112, 'acc_vowel': 0.998556, 'acc_consonant': 0.998855, 'acc_word': 0.996763, 'loss_grapheme': 0.014144, 'loss_vowel': 0.007499, 'loss_consonant': 0.004817, 'loss_word': 0.014549}
SWA>>>:


100%|██████████| 1255/1255 [07:30<00:00,  2.70it/s]



val: {'recall': 0.998675, 'recall_grapheme': 0.998165, 'recall_vowel': 0.999115, 'recall_consonant': 0.999255, 'recall_word': 0.998068, 'acc_grapheme': 0.998108, 'acc_vowel': 0.999228, 'acc_consonant': 0.999402, 'acc_word': 0.998033, 'loss_grapheme': 0.00917, 'loss_vowel': 0.004674, 'loss_consonant': 0.002878, 'loss_word': 0.010282}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   24 | 0.000150 | 160640/160678 | 6.6417 | 5.2441 | 14.46 ||
val: {'recall': 0.997259, 'recall_grapheme': 0.996574, 'recall_vowel': 0.998941, 'recall_consonant': 0.996949, 'recall_word': 0.997221, 'acc_grapheme': 0.997037, 'acc_vowel': 0.999029, 'acc_consonant': 0.999054, 'acc_word': 0.997112, 'loss_grapheme': 0.013763, 'loss_vowel': 0.0062, 'loss_consonant': 0.0046, 'loss_word': 0.013484}
   25 | 0.000141 | 160640/160678 | 0.6903 | 5.4493 | 14.42 ||
val: {'recall': 0.997648, 'recall_grapheme': 0.9968, 'recall_vowel': 0.998817, 'recall_consonant': 0.998173, 'recall_word': 0.99719

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997928, 'recall_grapheme': 0.996799, 'recall_vowel': 0.998909, 'recall_consonant': 0.999204, 'recall_word': 0.997057, 'acc_grapheme': 0.997037, 'acc_vowel': 0.99888, 'acc_consonant': 0.999203, 'acc_word': 0.996987, 'loss_grapheme': 0.014454, 'loss_vowel': 0.006616, 'loss_consonant': 0.00442, 'loss_word': 0.014306}
SWA>>>:


100%|██████████| 1255/1255 [07:27<00:00,  2.86it/s]



val: {'recall': 0.998674, 'recall_grapheme': 0.998173, 'recall_vowel': 0.9991, 'recall_consonant': 0.99925, 'recall_word': 0.99809, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999203, 'acc_consonant': 0.999378, 'acc_word': 0.998058, 'loss_grapheme': 0.009199, 'loss_vowel': 0.004672, 'loss_consonant': 0.002878, 'loss_word': 0.010275}
   27 | 0.000122 | 160640/160678 | 5.5732 | 5.3044 | 14.48 ||
val: {'recall': 0.997805, 'recall_grapheme': 0.996674, 'recall_vowel': 0.998666, 'recall_consonant': 0.999204, 'recall_word': 0.997077, 'acc_grapheme': 0.997112, 'acc_vowel': 0.998755, 'acc_consonant': 0.999203, 'acc_word': 0.996987, 'loss_grapheme': 0.013126, 'loss_vowel': 0.006107, 'loss_consonant': 0.004092, 'loss_word': 0.013229}
   28 | 0.000113 | 160640/160678 | 6.8696 | 5.3479 | 14.44 ||
val: {'recall': 0.997893, 'recall_grapheme': 0.996795, 'recall_vowel': 0.998771, 'recall_consonant': 0.999213, 'recall_word': 0.997119, 'acc_grapheme': 0.997012, 'acc_vowel': 0.99888, 'acc_consonant': 0.9992

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997129, 'recall_grapheme': 0.995879, 'recall_vowel': 0.998592, 'recall_consonant': 0.998166, 'recall_word': 0.996444, 'acc_grapheme': 0.996464, 'acc_vowel': 0.998606, 'acc_consonant': 0.998805, 'acc_word': 0.99634, 'loss_grapheme': 0.017397, 'loss_vowel': 0.007623, 'loss_consonant': 0.005113, 'loss_word': 0.016864}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.76it/s]



val: {'recall': 0.998693, 'recall_grapheme': 0.998173, 'recall_vowel': 0.999111, 'recall_consonant': 0.999317, 'recall_word': 0.998115, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.998083, 'loss_grapheme': 0.009206, 'loss_vowel': 0.004678, 'loss_consonant': 0.002865, 'loss_word': 0.010286}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   30 | 0.000095 | 160640/160678 | 0.1409 | 5.0472 | 14.55 ||
val: {'recall': 0.997939, 'recall_grapheme': 0.997178, 'recall_vowel': 0.998883, 'recall_consonant': 0.998517, 'recall_word': 0.997268, 'acc_grapheme': 0.997336, 'acc_vowel': 0.998904, 'acc_consonant': 0.999203, 'acc_word': 0.997161, 'loss_grapheme': 0.013143, 'loss_vowel': 0.00621, 'loss_consonant': 0.003996, 'loss_word': 0.013588}
   31 | 0.000086 | 160640/160678 | 5.3479 | 5.3397 | 14.44 ||
val: {'recall': 0.997589, 'recall_grapheme': 0.996406, 'recall_vowel': 0.998638, 'recall_consonant': 0.998904, 'recall_word': 0

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997661, 'recall_grapheme': 0.996447, 'recall_vowel': 0.998589, 'recall_consonant': 0.999161, 'recall_word': 0.997075, 'acc_grapheme': 0.996937, 'acc_vowel': 0.998705, 'acc_consonant': 0.999153, 'acc_word': 0.997012, 'loss_grapheme': 0.014886, 'loss_vowel': 0.007092, 'loss_consonant': 0.004173, 'loss_word': 0.014879}
SWA>>>:


 82%|████████▏ | 1025/1255 [06:05<01:23,  2.74it/s]


val: {'recall': 0.998631, 'recall_grapheme': 0.99809, 'recall_vowel': 0.999026, 'recall_consonant': 0.999317, 'recall_word': 0.998066, 'acc_grapheme': 0.998108, 'acc_vowel': 0.999203, 'acc_consonant': 0.999427, 'acc_word': 0.998033, 'loss_grapheme': 0.009239, 'loss_vowel': 0.004681, 'loss_consonant': 0.002872, 'loss_word': 0.010289}
   33 | 0.000070 | 160640/160678 | 6.6393 | 5.3573 | 14.43 ||
val: {'recall': 0.997507, 'recall_grapheme': 0.996098, 'recall_vowel': 0.998758, 'recall_consonant': 0.999075, 'recall_word': 0.996947, 'acc_grapheme': 0.996763, 'acc_vowel': 0.99888, 'acc_consonant': 0.999153, 'acc_word': 0.996838, 'loss_grapheme': 0.014728, 'loss_vowel': 0.006658, 'loss_consonant': 0.004127, 'loss_word': 0.014681}
   34 | 0.000062 | 160640/160678 | 12.7309 | 5.4813 | 14.47 |
val: {'recall': 0.997559, 'recall_grapheme': 0.996341, 'recall_vowel': 0.998682, 'recall_consonant': 0.998873, 'recall_word': 0.996883, 'acc_grapheme': 0.997137, 'acc_vowel': 0.99883, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99778, 'recall_grapheme': 0.996646, 'recall_vowel': 0.998771, 'recall_consonant': 0.999055, 'recall_word': 0.997545, 'acc_grapheme': 0.997236, 'acc_vowel': 0.99888, 'acc_consonant': 0.999178, 'acc_word': 0.99746, 'loss_grapheme': 0.01372, 'loss_vowel': 0.006417, 'loss_consonant': 0.004061, 'loss_word': 0.013845}
SWA>>>:


100%|██████████| 1255/1255 [07:29<00:00,  2.82it/s]



val: {'recall': 0.998675, 'recall_grapheme': 0.998149, 'recall_vowel': 0.999055, 'recall_consonant': 0.999347, 'recall_word': 0.99809, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999253, 'acc_consonant': 0.999452, 'acc_word': 0.998058, 'loss_grapheme': 0.00924, 'loss_vowel': 0.00472, 'loss_consonant': 0.002889, 'loss_word': 0.010288}
   36 | 0.000047 | 160640/160678 | 0.1079 | 5.0609 | 14.50 ||
val: {'recall': 0.998157, 'recall_grapheme': 0.997147, 'recall_vowel': 0.999004, 'recall_consonant': 0.99933, 'recall_word': 0.997362, 'acc_grapheme': 0.997236, 'acc_vowel': 0.999029, 'acc_consonant': 0.999253, 'acc_word': 0.997286, 'loss_grapheme': 0.013283, 'loss_vowel': 0.005893, 'loss_consonant': 0.003939, 'loss_word': 0.01377}
   37 | 0.000041 | 160640/160678 | 0.2853 | 5.3799 | 14.47 ||
val: {'recall': 0.997933, 'recall_grapheme': 0.997307, 'recall_vowel': 0.998944, 'recall_consonant': 0.998175, 'recall_word': 0.997476, 'acc_grapheme': 0.997585, 'acc_vowel': 0.999004, 'acc_consonant': 0.9993

100%|██████████| 1255/1255 [07:30<00:00,  2.86it/s]



val: {'recall': 0.998663, 'recall_grapheme': 0.998131, 'recall_vowel': 0.999042, 'recall_consonant': 0.999347, 'recall_word': 0.998063, 'acc_grapheme': 0.998133, 'acc_vowel': 0.999228, 'acc_consonant': 0.999452, 'acc_word': 0.998033, 'loss_grapheme': 0.009242, 'loss_vowel': 0.004746, 'loss_consonant': 0.002904, 'loss_word': 0.010307}
   40 | 0.000023 | 160640/160678 | 9.8059 | 5.0913 | 14.48 ||
val: {'recall': 0.997666, 'recall_grapheme': 0.997019, 'recall_vowel': 0.998793, 'recall_consonant': 0.997832, 'recall_word': 0.997305, 'acc_grapheme': 0.997161, 'acc_vowel': 0.998904, 'acc_consonant': 0.999129, 'acc_word': 0.997211, 'loss_grapheme': 0.014669, 'loss_vowel': 0.006743, 'loss_consonant': 0.004553, 'loss_word': 0.014631}
   41 | 0.000019 | 160640/160678 | 9.5720 | 5.1584 | 14.44 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998077, 'recall_grapheme': 0.997759, 'recall_vowel': 0.998751, 'recall_consonant': 0.998038, 'recall_word': 0.997716, 'acc_grapheme': 0.997734, 'acc_vowel': 0.998954, 'acc_consonant': 0.999203, 'acc_word': 0.997635, 'loss_grapheme': 0.011949, 'loss_vowel': 0.005873, 'loss_consonant': 0.00346, 'loss_word': 0.012346}
SWA>>>:


100%|██████████| 1255/1255 [07:31<00:00,  2.73it/s]



val: {'recall': 0.998681, 'recall_grapheme': 0.998168, 'recall_vowel': 0.999042, 'recall_consonant': 0.999347, 'recall_word': 0.998105, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999228, 'acc_consonant': 0.999452, 'acc_word': 0.998083, 'loss_grapheme': 0.009251, 'loss_vowel': 0.004735, 'loss_consonant': 0.002906, 'loss_word': 0.010311}
   42 | 0.000014 | 160640/160678 | 5.5592 | 4.8911 | 14.57 ||
val: {'recall': 0.998099, 'recall_grapheme': 0.997771, 'recall_vowel': 0.998806, 'recall_consonant': 0.998049, 'recall_word': 0.997687, 'acc_grapheme': 0.997784, 'acc_vowel': 0.998954, 'acc_consonant': 0.999253, 'acc_word': 0.99761, 'loss_grapheme': 0.011273, 'loss_vowel': 0.006081, 'loss_consonant': 0.003277, 'loss_word': 0.011993}
   43 | 0.000011 | 160640/160678 | 4.8980 | 4.8186 | 14.49 ||
val: {'recall': 0.997788, 'recall_grapheme': 0.997215, 'recall_vowel': 0.998765, 'recall_consonant': 0.997957, 'recall_word': 0.997463, 'acc_grapheme': 0.99741, 'acc_vowel': 0.998904, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998225, 'recall_grapheme': 0.997497, 'recall_vowel': 0.998799, 'recall_consonant': 0.999109, 'recall_word': 0.997708, 'acc_grapheme': 0.997759, 'acc_vowel': 0.998929, 'acc_consonant': 0.999303, 'acc_word': 0.99761, 'loss_grapheme': 0.012283, 'loss_vowel': 0.006046, 'loss_consonant': 0.003455, 'loss_word': 0.012875}
SWA>>>:


100%|██████████| 1255/1255 [07:29<00:00,  2.84it/s]



val: {'recall': 0.998678, 'recall_grapheme': 0.998178, 'recall_vowel': 0.999042, 'recall_consonant': 0.999314, 'recall_word': 0.998099, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.998083, 'loss_grapheme': 0.009247, 'loss_vowel': 0.004746, 'loss_consonant': 0.002901, 'loss_word': 0.010305}
   45 | 0.000005 | 160640/160678 | 0.4216 | 5.1773 | 14.50 ||
val: {'recall': 0.998333, 'recall_grapheme': 0.99753, 'recall_vowel': 0.99891, 'recall_consonant': 0.999362, 'recall_word': 0.997679, 'acc_grapheme': 0.997859, 'acc_vowel': 0.999104, 'acc_consonant': 0.999378, 'acc_word': 0.997585, 'loss_grapheme': 0.011018, 'loss_vowel': 0.005305, 'loss_consonant': 0.003185, 'loss_word': 0.011873}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
   46 | 0.000003 | 160640/160678 | 5.0941 | 5.1105 | 14.48 ||
val: {'recall': 0.997824, 'recall_grapheme': 0.996936, 'recall_vowel': 0.99864, 'recall_consonant': 0.998785, 'recall_word': 0.99735

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997973, 'recall_grapheme': 0.997041, 'recall_vowel': 0.998768, 'recall_consonant': 0.999043, 'recall_word': 0.997601, 'acc_grapheme': 0.997336, 'acc_vowel': 0.998904, 'acc_consonant': 0.999228, 'acc_word': 0.997485, 'loss_grapheme': 0.013219, 'loss_vowel': 0.006202, 'loss_consonant': 0.003792, 'loss_word': 0.013352}
SWA>>>:


100%|██████████| 1255/1255 [07:29<00:00,  2.85it/s]



val: {'recall': 0.998678, 'recall_grapheme': 0.998178, 'recall_vowel': 0.999042, 'recall_consonant': 0.999314, 'recall_word': 0.998126, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.998108, 'loss_grapheme': 0.009265, 'loss_vowel': 0.004755, 'loss_consonant': 0.002907, 'loss_word': 0.010317}
   48 | 0.000000 | 160640/160678 | 0.1413 | 5.0579 | 14.49 ||
val: {'recall': 0.998158, 'recall_grapheme': 0.997671, 'recall_vowel': 0.999014, 'recall_consonant': 0.998276, 'recall_word': 0.997761, 'acc_grapheme': 0.997809, 'acc_vowel': 0.999079, 'acc_consonant': 0.999353, 'acc_word': 0.997659, 'loss_grapheme': 0.010846, 'loss_vowel': 0.005292, 'loss_consonant': 0.003221, 'loss_word': 0.011692}
   49 | 0.000000 | 160640/160678 | 3.5076 | 5.0581 | 14.48 ||
val: {'recall': 0.998133, 'recall_grapheme': 0.997196, 'recall_vowel': 0.998812, 'recall_consonant': 0.999328, 'recall_word': 0.997427, 'acc_grapheme': 0.997311, 'acc_vowel': 0.998929, 'acc_consonant': 0

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997136, 'recall_grapheme': 0.99654, 'recall_vowel': 0.998793, 'recall_consonant': 0.996672, 'recall_word': 0.996705, 'acc_grapheme': 0.996589, 'acc_vowel': 0.99873, 'acc_consonant': 0.998929, 'acc_word': 0.996639, 'loss_grapheme': 0.015674, 'loss_vowel': 0.007459, 'loss_consonant': 0.00529, 'loss_word': 0.016239}
SWA>>>:


100%|██████████| 1255/1255 [07:29<00:00,  2.86it/s]



val: {'recall': 0.998683, 'recall_grapheme': 0.998168, 'recall_vowel': 0.999083, 'recall_consonant': 0.999314, 'recall_word': 0.99812, 'acc_grapheme': 0.998182, 'acc_vowel': 0.999278, 'acc_consonant': 0.999427, 'acc_word': 0.998108, 'loss_grapheme': 0.009257, 'loss_vowel': 0.004745, 'loss_consonant': 0.002933, 'loss_word': 0.010313}
   15 | 0.000230 | 160640/160678 | 0.5164 | 5.2248 | 14.51 ||
val: {'recall': 0.997882, 'recall_grapheme': 0.997067, 'recall_vowel': 0.998849, 'recall_consonant': 0.998545, 'recall_word': 0.996845, 'acc_grapheme': 0.997037, 'acc_vowel': 0.99883, 'acc_consonant': 0.998979, 'acc_word': 0.996788, 'loss_grapheme': 0.014204, 'loss_vowel': 0.006026, 'loss_consonant': 0.004558, 'loss_word': 0.014394}
   16 | 0.000222 | 160640/160678 | 6.1086 | 5.2927 | 14.62 ||
val: {'recall': 0.997296, 'recall_grapheme': 0.995805, 'recall_vowel': 0.99871, 'recall_consonant': 0.998863, 'recall_word': 0.996004, 'acc_grapheme': 0.996091, 'acc_vowel': 0.99873, 'acc_consonant': 0.998

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997579, 'recall_grapheme': 0.996901, 'recall_vowel': 0.998452, 'recall_consonant': 0.998059, 'recall_word': 0.996906, 'acc_grapheme': 0.997087, 'acc_vowel': 0.998755, 'acc_consonant': 0.999029, 'acc_word': 0.996788, 'loss_grapheme': 0.013683, 'loss_vowel': 0.006352, 'loss_consonant': 0.004681, 'loss_word': 0.014654}
SWA>>>:


100%|██████████| 1255/1255 [07:36<00:00,  2.70it/s]



val: {'recall': 0.998665, 'recall_grapheme': 0.998145, 'recall_vowel': 0.999083, 'recall_consonant': 0.999286, 'recall_word': 0.998094, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999278, 'acc_consonant': 0.999427, 'acc_word': 0.998083, 'loss_grapheme': 0.009226, 'loss_vowel': 0.004718, 'loss_consonant': 0.002935, 'loss_word': 0.010285}
   18 | 0.000205 | 160640/160678 | 10.5958 | 5.3964 | 14.52 |
val: {'recall': 0.997309, 'recall_grapheme': 0.99578, 'recall_vowel': 0.998788, 'recall_consonant': 0.998885, 'recall_word': 0.996592, 'acc_grapheme': 0.996713, 'acc_vowel': 0.998929, 'acc_consonant': 0.999029, 'acc_word': 0.996564, 'loss_grapheme': 0.014628, 'loss_vowel': 0.007128, 'loss_consonant': 0.004362, 'loss_word': 0.014304}
   19 | 0.000196 | 160640/160678 | 0.7100 | 5.2141 | 14.53 ||
val: {'recall': 0.997397, 'recall_grapheme': 0.996567, 'recall_vowel': 0.99859, 'recall_consonant': 0.997864, 'recall_word': 0.996636, 'acc_grapheme': 0.996664, 'acc_vowel': 0.99868, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997179, 'recall_grapheme': 0.996089, 'recall_vowel': 0.998756, 'recall_consonant': 0.997782, 'recall_word': 0.996618, 'acc_grapheme': 0.996614, 'acc_vowel': 0.99883, 'acc_consonant': 0.99873, 'acc_word': 0.996514, 'loss_grapheme': 0.017554, 'loss_vowel': 0.007191, 'loss_consonant': 0.00592, 'loss_word': 0.017446}
SWA>>>:


100%|██████████| 1255/1255 [07:29<00:00,  2.79it/s]



val: {'recall': 0.998662, 'recall_grapheme': 0.998145, 'recall_vowel': 0.999071, 'recall_consonant': 0.999286, 'recall_word': 0.998094, 'acc_grapheme': 0.998157, 'acc_vowel': 0.999253, 'acc_consonant': 0.999427, 'acc_word': 0.998083, 'loss_grapheme': 0.009256, 'loss_vowel': 0.004733, 'loss_consonant': 0.002929, 'loss_word': 0.010298}
   21 | 0.000178 | 160640/160678 | 4.3599 | 5.4885 | 14.47 ||
val: {'recall': 0.997731, 'recall_grapheme': 0.996469, 'recall_vowel': 0.99887, 'recall_consonant': 0.999117, 'recall_word': 0.997149, 'acc_grapheme': 0.997012, 'acc_vowel': 0.998855, 'acc_consonant': 0.999029, 'acc_word': 0.997087, 'loss_grapheme': 0.015353, 'loss_vowel': 0.006978, 'loss_consonant': 0.004377, 'loss_word': 0.014902}
   22 | 0.000169 | 160640/160678 | 0.4126 | 5.4459 | 14.48 ||
val: {'recall': 0.997691, 'recall_grapheme': 0.996581, 'recall_vowel': 0.998571, 'recall_consonant': 0.999032, 'recall_word': 0.996705, 'acc_grapheme': 0.996788, 'acc_vowel': 0.998755, 'acc_consonant': 0.

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997874, 'recall_grapheme': 0.996941, 'recall_vowel': 0.998916, 'recall_consonant': 0.998698, 'recall_word': 0.996945, 'acc_grapheme': 0.996937, 'acc_vowel': 0.998954, 'acc_consonant': 0.999079, 'acc_word': 0.996913, 'loss_grapheme': 0.015842, 'loss_vowel': 0.006548, 'loss_consonant': 0.004328, 'loss_word': 0.014929}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.82it/s]



val: {'recall': 0.998685, 'recall_grapheme': 0.998191, 'recall_vowel': 0.999083, 'recall_consonant': 0.999275, 'recall_word': 0.998124, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999278, 'acc_consonant': 0.999378, 'acc_word': 0.998108, 'loss_grapheme': 0.009227, 'loss_vowel': 0.004701, 'loss_consonant': 0.002924, 'loss_word': 0.010262}
   24 | 0.000150 | 160640/160678 | 10.3455 | 5.2598 | 14.54 |
val: {'recall': 0.997841, 'recall_grapheme': 0.997057, 'recall_vowel': 0.998619, 'recall_consonant': 0.998631, 'recall_word': 0.997266, 'acc_grapheme': 0.997087, 'acc_vowel': 0.998805, 'acc_consonant': 0.999104, 'acc_word': 0.997186, 'loss_grapheme': 0.01513, 'loss_vowel': 0.006716, 'loss_consonant': 0.004704, 'loss_word': 0.015115}
   25 | 0.000141 | 160640/160678 | 5.6906 | 5.0647 | 14.56 ||
val: {'recall': 0.997627, 'recall_grapheme': 0.996484, 'recall_vowel': 0.998566, 'recall_consonant': 0.998975, 'recall_word': 0.996856, 'acc_grapheme': 0.996788, 'acc_vowel': 0.99878, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997654, 'recall_grapheme': 0.996522, 'recall_vowel': 0.998607, 'recall_consonant': 0.998964, 'recall_word': 0.996799, 'acc_grapheme': 0.996813, 'acc_vowel': 0.998805, 'acc_consonant': 0.999079, 'acc_word': 0.996738, 'loss_grapheme': 0.016087, 'loss_vowel': 0.006706, 'loss_consonant': 0.004175, 'loss_word': 0.015579}
SWA>>>:


100%|██████████| 1255/1255 [07:35<00:00,  2.84it/s]



val: {'recall': 0.998677, 'recall_grapheme': 0.998172, 'recall_vowel': 0.999083, 'recall_consonant': 0.99928, 'recall_word': 0.998098, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999278, 'acc_consonant': 0.999402, 'acc_word': 0.998083, 'loss_grapheme': 0.009246, 'loss_vowel': 0.004707, 'loss_consonant': 0.002913, 'loss_word': 0.010292}
   27 | 0.000122 | 160640/160678 | 0.2263 | 5.2864 | 14.51 ||
val: {'recall': 0.997813, 'recall_grapheme': 0.996634, 'recall_vowel': 0.998887, 'recall_consonant': 0.999096, 'recall_word': 0.997282, 'acc_grapheme': 0.997261, 'acc_vowel': 0.999004, 'acc_consonant': 0.999178, 'acc_word': 0.997236, 'loss_grapheme': 0.013065, 'loss_vowel': 0.005998, 'loss_consonant': 0.004054, 'loss_word': 0.013361}
   28 | 0.000113 | 160640/160678 | 0.0326 | 5.3469 | 14.44 ||
val: {'recall': 0.998143, 'recall_grapheme': 0.997263, 'recall_vowel': 0.998833, 'recall_consonant': 0.999215, 'recall_word': 0.997316, 'acc_grapheme': 0.997386, 'acc_vowel': 0.99888, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997805, 'recall_grapheme': 0.996926, 'recall_vowel': 0.998699, 'recall_consonant': 0.998671, 'recall_word': 0.997108, 'acc_grapheme': 0.997087, 'acc_vowel': 0.998855, 'acc_consonant': 0.998954, 'acc_word': 0.997087, 'loss_grapheme': 0.014715, 'loss_vowel': 0.006973, 'loss_consonant': 0.004651, 'loss_word': 0.014557}
SWA>>>:


100%|██████████| 1255/1255 [07:31<00:00,  2.76it/s]



val: {'recall': 0.998689, 'recall_grapheme': 0.998205, 'recall_vowel': 0.999071, 'recall_consonant': 0.999275, 'recall_word': 0.998098, 'acc_grapheme': 0.998257, 'acc_vowel': 0.999253, 'acc_consonant': 0.999378, 'acc_word': 0.998083, 'loss_grapheme': 0.009214, 'loss_vowel': 0.004671, 'loss_consonant': 0.002913, 'loss_word': 0.01023}
   30 | 0.000095 | 160640/160678 | 0.4890 | 5.0663 | 14.52 ||
val: {'recall': 0.997606, 'recall_grapheme': 0.99674, 'recall_vowel': 0.998862, 'recall_consonant': 0.998084, 'recall_word': 0.997176, 'acc_grapheme': 0.996987, 'acc_vowel': 0.998929, 'acc_consonant': 0.999203, 'acc_word': 0.997137, 'loss_grapheme': 0.01409, 'loss_vowel': 0.00592, 'loss_consonant': 0.004008, 'loss_word': 0.014213}
   31 | 0.000086 | 160640/160678 | 4.4006 | 5.0510 | 14.57 ||
val: {'recall': 0.997843, 'recall_grapheme': 0.996659, 'recall_vowel': 0.998905, 'recall_consonant': 0.999148, 'recall_word': 0.997106, 'acc_grapheme': 0.997186, 'acc_vowel': 0.999004, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998071, 'recall_grapheme': 0.997151, 'recall_vowel': 0.998729, 'recall_consonant': 0.999255, 'recall_word': 0.997553, 'acc_grapheme': 0.99761, 'acc_vowel': 0.999004, 'acc_consonant': 0.999278, 'acc_word': 0.99751, 'loss_grapheme': 0.012577, 'loss_vowel': 0.005745, 'loss_consonant': 0.003198, 'loss_word': 0.013072}
SWA>>>:


100%|██████████| 1255/1255 [07:30<00:00,  2.78it/s]



val: {'recall': 0.998665, 'recall_grapheme': 0.998165, 'recall_vowel': 0.999055, 'recall_consonant': 0.999275, 'recall_word': 0.998098, 'acc_grapheme': 0.998257, 'acc_vowel': 0.999228, 'acc_consonant': 0.999378, 'acc_word': 0.998083, 'loss_grapheme': 0.009232, 'loss_vowel': 0.004671, 'loss_consonant': 0.002901, 'loss_word': 0.010244}
   33 | 0.000070 | 160640/160678 | 10.0970 | 5.3108 | 14.49 |
val: {'recall': 0.998132, 'recall_grapheme': 0.997275, 'recall_vowel': 0.99888, 'recall_consonant': 0.999098, 'recall_word': 0.997563, 'acc_grapheme': 0.997684, 'acc_vowel': 0.999153, 'acc_consonant': 0.999253, 'acc_word': 0.99751, 'loss_grapheme': 0.012275, 'loss_vowel': 0.005423, 'loss_consonant': 0.003501, 'loss_word': 0.012497}
   34 | 0.000062 | 160640/160678 | 3.5376 | 5.3721 | 14.50 ||
val: {'recall': 0.997322, 'recall_grapheme': 0.996334, 'recall_vowel': 0.998822, 'recall_consonant': 0.997796, 'recall_word': 0.997072, 'acc_grapheme': 0.996913, 'acc_vowel': 0.998929, 'acc_consonant': 0.9

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1255/1255 [07:29<00:00,  2.83it/s]



val: {'recall': 0.998642, 'recall_grapheme': 0.998135, 'recall_vowel': 0.999055, 'recall_consonant': 0.999244, 'recall_word': 0.998099, 'acc_grapheme': 0.998207, 'acc_vowel': 0.999228, 'acc_consonant': 0.999353, 'acc_word': 0.998083, 'loss_grapheme': 0.009189, 'loss_vowel': 0.004649, 'loss_consonant': 0.002898, 'loss_word': 0.010201}
   39 | 0.000031 | 086784/160678 | 6.6328 | 5.1872 | 7.83 ||

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: False

val: {'recall': 0.997777, 'recall_grapheme': 0.997203, 'recall_vowel': 0.9987, 'recall_consonant': 0.998002, 'recall_word': 0.997111, 'acc_grapheme': 0.997311, 'acc_vowel': 0.99888, 'acc_consonant': 0.999104, 'acc_word': 0.997037, 'loss_grapheme': 0.014067, 'loss_vowel': 0.006681, 'loss_consonant': 0.004011, 'loss_word': 0.014713}
CYCLE: 1
    8 | 0.000277 | 160640/160678 | 17.6224 | 5.4163 | 14.52 |
val: {'recall': 0.997021, 'recall_grapheme': 0.995488, 'recall_vowel': 0.998185, 'recall_consonant': 0.998923, 'recall_word': 0.995881, 'acc_grapheme': 0.996091, 'acc_vowel': 0.998531, 'acc_consonant': 0.998805, 'acc_word': 0.995817, 'loss_grapheme': 0.018216, 'loss_vowel': 0.008646, 'loss_consonant': 0.005926, 'loss_word': 0.018805}
    9 | 0.000271 | 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.996452, 'recall_grapheme': 0.995173, 'recall_vowel': 0.99805, 'recall_consonant': 0.997411, 'recall_word': 0.995446, 'acc_grapheme': 0.995692, 'acc_vowel': 0.998282, 'acc_consonant': 0.998357, 'acc_word': 0.995443, 'loss_grapheme': 0.020047, 'loss_vowel': 0.008777, 'loss_consonant': 0.007031, 'loss_word': 0.020246}
SWA>>>:


100%|██████████| 1255/1255 [07:22<00:00,  2.74it/s]



val: {'recall': 0.996948, 'recall_grapheme': 0.995878, 'recall_vowel': 0.998365, 'recall_consonant': 0.997671, 'recall_word': 0.996224, 'acc_grapheme': 0.996365, 'acc_vowel': 0.998606, 'acc_consonant': 0.99873, 'acc_word': 0.996166, 'loss_grapheme': 0.014388, 'loss_vowel': 0.006843, 'loss_consonant': 0.005331, 'loss_word': 0.015355}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   10 | 0.000266 | 160640/160678 | 0.7395 | 5.6629 | 14.55 ||
val: {'recall': 0.996777, 'recall_grapheme': 0.995017, 'recall_vowel': 0.998473, 'recall_consonant': 0.9986, 'recall_word': 0.995581, 'acc_grapheme': 0.995817, 'acc_vowel': 0.998456, 'acc_consonant': 0.998531, 'acc_word': 0.995543, 'loss_grapheme': 0.01995, 'loss_vowel': 0.008123, 'loss_consonant': 0.006372, 'loss_word': 0.018806}
   11 | 0.000259 | 160640/160678 | 3.3898 | 5.8648 | 14.48 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997052, 'recall_grapheme': 0.995819, 'recall_vowel': 0.998293, 'recall_consonant': 0.998278, 'recall_word': 0.996098, 'acc_grapheme': 0.996116, 'acc_vowel': 0.998481, 'acc_consonant': 0.998705, 'acc_word': 0.996066, 'loss_grapheme': 0.019854, 'loss_vowel': 0.009025, 'loss_consonant': 0.006829, 'loss_word': 0.017836}
SWA>>>:


100%|██████████| 1255/1255 [07:22<00:00,  2.88it/s]



val: {'recall': 0.997659, 'recall_grapheme': 0.9972, 'recall_vowel': 0.998749, 'recall_consonant': 0.997486, 'recall_word': 0.997172, 'acc_grapheme': 0.997361, 'acc_vowel': 0.99888, 'acc_consonant': 0.999079, 'acc_word': 0.997137, 'loss_grapheme': 0.010937, 'loss_vowel': 0.005405, 'loss_consonant': 0.003997, 'loss_word': 0.011897}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   12 | 0.000253 | 160640/160678 | 7.8424 | 5.6098 | 14.51 ||
val: {'recall': 0.99691, 'recall_grapheme': 0.995702, 'recall_vowel': 0.998172, 'recall_consonant': 0.998065, 'recall_word': 0.996142, 'acc_grapheme': 0.996141, 'acc_vowel': 0.998531, 'acc_consonant': 0.998855, 'acc_word': 0.996116, 'loss_grapheme': 0.01899, 'loss_vowel': 0.007375, 'loss_consonant': 0.005545, 'loss_word': 0.017601}
   13 | 0.000246 | 160640/160678 | 0.5989 | 5.5960 | 14.54 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99723, 'recall_grapheme': 0.995767, 'recall_vowel': 0.998427, 'recall_consonant': 0.99896, 'recall_word': 0.996256, 'acc_grapheme': 0.99624, 'acc_vowel': 0.998755, 'acc_consonant': 0.998805, 'acc_word': 0.996215, 'loss_grapheme': 0.01634, 'loss_vowel': 0.006459, 'loss_consonant': 0.005056, 'loss_word': 0.015936}
SWA>>>:


100%|██████████| 1255/1255 [07:23<00:00,  2.82it/s]



val: {'recall': 0.998279, 'recall_grapheme': 0.997482, 'recall_vowel': 0.99894, 'recall_consonant': 0.999211, 'recall_word': 0.997412, 'acc_grapheme': 0.997734, 'acc_vowel': 0.999054, 'acc_consonant': 0.999303, 'acc_word': 0.997336, 'loss_grapheme': 0.010136, 'loss_vowel': 0.005108, 'loss_consonant': 0.003642, 'loss_word': 0.010995}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   14 | 0.000238 | 160640/160678 | 0.9021 | 5.4465 | 14.59 ||
val: {'recall': 0.997424, 'recall_grapheme': 0.996629, 'recall_vowel': 0.998472, 'recall_consonant': 0.997967, 'recall_word': 0.996573, 'acc_grapheme': 0.996788, 'acc_vowel': 0.998755, 'acc_consonant': 0.998855, 'acc_word': 0.996539, 'loss_grapheme': 0.014499, 'loss_vowel': 0.006752, 'loss_consonant': 0.005164, 'loss_word': 0.015201}
   15 | 0.000230 | 160640/160678 | 0.2959 | 5.4533 | 14.58 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997111, 'recall_grapheme': 0.996052, 'recall_vowel': 0.998064, 'recall_consonant': 0.998275, 'recall_word': 0.99635, 'acc_grapheme': 0.996589, 'acc_vowel': 0.998506, 'acc_consonant': 0.99878, 'acc_word': 0.99629, 'loss_grapheme': 0.015419, 'loss_vowel': 0.007114, 'loss_consonant': 0.004932, 'loss_word': 0.015244}
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.88it/s]



val: {'recall': 0.998272, 'recall_grapheme': 0.997468, 'recall_vowel': 0.998916, 'recall_consonant': 0.999236, 'recall_word': 0.997627, 'acc_grapheme': 0.997834, 'acc_vowel': 0.999029, 'acc_consonant': 0.999303, 'acc_word': 0.99756, 'loss_grapheme': 0.00945, 'loss_vowel': 0.004925, 'loss_consonant': 0.003254, 'loss_word': 0.010409}
   16 | 0.000222 | 160640/160678 | 10.8112 | 5.4566 | 14.58 |
val: {'recall': 0.9971, 'recall_grapheme': 0.996088, 'recall_vowel': 0.998334, 'recall_consonant': 0.99789, 'recall_word': 0.996763, 'acc_grapheme': 0.996639, 'acc_vowel': 0.998606, 'acc_consonant': 0.998979, 'acc_word': 0.996738, 'loss_grapheme': 0.016413, 'loss_vowel': 0.007198, 'loss_consonant': 0.005118, 'loss_word': 0.015594}
   17 | 0.000214 | 160640/160678 | 1.0638 | 5.5931 | 14.57 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997485, 'recall_grapheme': 0.99627, 'recall_vowel': 0.998553, 'recall_consonant': 0.998849, 'recall_word': 0.996588, 'acc_grapheme': 0.996415, 'acc_vowel': 0.998655, 'acc_consonant': 0.998805, 'acc_word': 0.996514, 'loss_grapheme': 0.016593, 'loss_vowel': 0.006738, 'loss_consonant': 0.004625, 'loss_word': 0.016071}
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.79it/s]



val: {'recall': 0.998293, 'recall_grapheme': 0.997527, 'recall_vowel': 0.998817, 'recall_consonant': 0.9993, 'recall_word': 0.997691, 'acc_grapheme': 0.997784, 'acc_vowel': 0.999004, 'acc_consonant': 0.999353, 'acc_word': 0.997635, 'loss_grapheme': 0.009491, 'loss_vowel': 0.004963, 'loss_consonant': 0.003214, 'loss_word': 0.010376}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   18 | 0.000205 | 160640/160678 | 7.0467 | 5.5238 | 14.56 ||
val: {'recall': 0.997625, 'recall_grapheme': 0.996493, 'recall_vowel': 0.998441, 'recall_consonant': 0.999073, 'recall_word': 0.996895, 'acc_grapheme': 0.996813, 'acc_vowel': 0.998481, 'acc_consonant': 0.998755, 'acc_word': 0.996838, 'loss_grapheme': 0.016372, 'loss_vowel': 0.007979, 'loss_consonant': 0.005166, 'loss_word': 0.015654}
   19 | 0.000196 | 160640/160678 | 3.4533 | 5.4238 | 14.60 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99692, 'recall_grapheme': 0.995621, 'recall_vowel': 0.998479, 'recall_consonant': 0.997959, 'recall_word': 0.99645, 'acc_grapheme': 0.996664, 'acc_vowel': 0.99873, 'acc_consonant': 0.998929, 'acc_word': 0.996365, 'loss_grapheme': 0.016834, 'loss_vowel': 0.008012, 'loss_consonant': 0.005066, 'loss_word': 0.017097}
SWA>>>:


100%|██████████| 1255/1255 [07:27<00:00,  2.74it/s]



val: {'recall': 0.99816, 'recall_grapheme': 0.99789, 'recall_vowel': 0.998713, 'recall_consonant': 0.998148, 'recall_word': 0.997754, 'acc_grapheme': 0.997933, 'acc_vowel': 0.998954, 'acc_consonant': 0.999253, 'acc_word': 0.997684, 'loss_grapheme': 0.009345, 'loss_vowel': 0.004971, 'loss_consonant': 0.003197, 'loss_word': 0.01029}
   20 | 0.000187 | 160640/160678 | 0.1724 | 5.5307 | 14.58 ||
val: {'recall': 0.997614, 'recall_grapheme': 0.996261, 'recall_vowel': 0.998878, 'recall_consonant': 0.999056, 'recall_word': 0.99668, 'acc_grapheme': 0.996639, 'acc_vowel': 0.998855, 'acc_consonant': 0.998979, 'acc_word': 0.996688, 'loss_grapheme': 0.014644, 'loss_vowel': 0.006527, 'loss_consonant': 0.004221, 'loss_word': 0.014517}
   21 | 0.000178 | 160640/160678 | 5.8060 | 5.4238 | 14.58 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997168, 'recall_grapheme': 0.995851, 'recall_vowel': 0.998182, 'recall_consonant': 0.998788, 'recall_word': 0.996638, 'acc_grapheme': 0.996688, 'acc_vowel': 0.998581, 'acc_consonant': 0.998929, 'acc_word': 0.996614, 'loss_grapheme': 0.016731, 'loss_vowel': 0.007349, 'loss_consonant': 0.005529, 'loss_word': 0.015817}
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.85it/s]



val: {'recall': 0.998163, 'recall_grapheme': 0.997843, 'recall_vowel': 0.99882, 'recall_consonant': 0.998148, 'recall_word': 0.997786, 'acc_grapheme': 0.997933, 'acc_vowel': 0.999054, 'acc_consonant': 0.999253, 'acc_word': 0.997734, 'loss_grapheme': 0.009366, 'loss_vowel': 0.004833, 'loss_consonant': 0.003136, 'loss_word': 0.010248}
   22 | 0.000169 | 160640/160678 | 0.4853 | 5.6410 | 14.54 ||
val: {'recall': 0.997365, 'recall_grapheme': 0.996448, 'recall_vowel': 0.998528, 'recall_consonant': 0.998038, 'recall_word': 0.996706, 'acc_grapheme': 0.996838, 'acc_vowel': 0.99868, 'acc_consonant': 0.999029, 'acc_word': 0.996688, 'loss_grapheme': 0.014289, 'loss_vowel': 0.006897, 'loss_consonant': 0.004664, 'loss_word': 0.014415}
   23 | 0.000159 | 160640/160678 | 0.5007 | 5.3476 | 14.63 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997548, 'recall_grapheme': 0.996134, 'recall_vowel': 0.998889, 'recall_consonant': 0.999033, 'recall_word': 0.996602, 'acc_grapheme': 0.996713, 'acc_vowel': 0.99873, 'acc_consonant': 0.998929, 'acc_word': 0.996489, 'loss_grapheme': 0.013565, 'loss_vowel': 0.006413, 'loss_consonant': 0.004768, 'loss_word': 0.014008}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.82it/s]



val: {'recall': 0.998111, 'recall_grapheme': 0.997599, 'recall_vowel': 0.998997, 'recall_consonant': 0.998249, 'recall_word': 0.997787, 'acc_grapheme': 0.997908, 'acc_vowel': 0.999129, 'acc_consonant': 0.999328, 'acc_word': 0.997734, 'loss_grapheme': 0.009398, 'loss_vowel': 0.004793, 'loss_consonant': 0.003064, 'loss_word': 0.010248}
   24 | 0.000150 | 160640/160678 | 4.1812 | 5.4920 | 14.53 ||
val: {'recall': 0.997489, 'recall_grapheme': 0.996161, 'recall_vowel': 0.998651, 'recall_consonant': 0.998982, 'recall_word': 0.996339, 'acc_grapheme': 0.996514, 'acc_vowel': 0.99878, 'acc_consonant': 0.998954, 'acc_word': 0.99629, 'loss_grapheme': 0.018486, 'loss_vowel': 0.007118, 'loss_consonant': 0.005611, 'loss_word': 0.018274}
   25 | 0.000141 | 160640/160678 | 0.3945 | 5.3263 | 14.55 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997532, 'recall_grapheme': 0.996112, 'recall_vowel': 0.998809, 'recall_consonant': 0.999096, 'recall_word': 0.996716, 'acc_grapheme': 0.996888, 'acc_vowel': 0.998805, 'acc_consonant': 0.999153, 'acc_word': 0.996664, 'loss_grapheme': 0.014281, 'loss_vowel': 0.007193, 'loss_consonant': 0.003904, 'loss_word': 0.014757}
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.74it/s]



val: {'recall': 0.998078, 'recall_grapheme': 0.997529, 'recall_vowel': 0.99904, 'recall_consonant': 0.998215, 'recall_word': 0.997784, 'acc_grapheme': 0.997834, 'acc_vowel': 0.999129, 'acc_consonant': 0.999303, 'acc_word': 0.997734, 'loss_grapheme': 0.009424, 'loss_vowel': 0.004763, 'loss_consonant': 0.003038, 'loss_word': 0.01031}
   26 | 0.000131 | 160640/160678 | 6.5092 | 5.3686 | 14.53 ||
val: {'recall': 0.998148, 'recall_grapheme': 0.997199, 'recall_vowel': 0.998979, 'recall_consonant': 0.999214, 'recall_word': 0.997097, 'acc_grapheme': 0.997161, 'acc_vowel': 0.999004, 'acc_consonant': 0.999303, 'acc_word': 0.997012, 'loss_grapheme': 0.013813, 'loss_vowel': 0.00617, 'loss_consonant': 0.003575, 'loss_word': 0.014154}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
   27 | 0.000122 | 160640/160678 | 2.1280 | 5.3695 | 14.53 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997558, 'recall_grapheme': 0.996641, 'recall_vowel': 0.998874, 'recall_consonant': 0.998077, 'recall_word': 0.996967, 'acc_grapheme': 0.996913, 'acc_vowel': 0.999004, 'acc_consonant': 0.999029, 'acc_word': 0.996888, 'loss_grapheme': 0.015472, 'loss_vowel': 0.005969, 'loss_consonant': 0.004342, 'loss_word': 0.015308}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.80it/s]



val: {'recall': 0.997982, 'recall_grapheme': 0.997361, 'recall_vowel': 0.998929, 'recall_consonant': 0.998277, 'recall_word': 0.997726, 'acc_grapheme': 0.997809, 'acc_vowel': 0.999129, 'acc_consonant': 0.999328, 'acc_word': 0.997659, 'loss_grapheme': 0.009388, 'loss_vowel': 0.00465, 'loss_consonant': 0.003021, 'loss_word': 0.010281}
   28 | 0.000113 | 160640/160678 | 6.4669 | 5.1745 | 14.57 ||
val: {'recall': 0.997363, 'recall_grapheme': 0.995762, 'recall_vowel': 0.998714, 'recall_consonant': 0.999213, 'recall_word': 0.996578, 'acc_grapheme': 0.996415, 'acc_vowel': 0.99888, 'acc_consonant': 0.999178, 'acc_word': 0.996564, 'loss_grapheme': 0.016631, 'loss_vowel': 0.006573, 'loss_consonant': 0.00435, 'loss_word': 0.016137}
   29 | 0.000104 | 160640/160678 | 3.5526 | 5.4778 | 14.53 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997784, 'recall_grapheme': 0.996369, 'recall_vowel': 0.998917, 'recall_consonant': 0.999481, 'recall_word': 0.997117, 'acc_grapheme': 0.997087, 'acc_vowel': 0.99888, 'acc_consonant': 0.999253, 'acc_word': 0.997087, 'loss_grapheme': 0.013385, 'loss_vowel': 0.00576, 'loss_consonant': 0.00373, 'loss_word': 0.013619}
SWA>>>:


100%|██████████| 1255/1255 [07:27<00:00,  2.78it/s]



val: {'recall': 0.997958, 'recall_grapheme': 0.997244, 'recall_vowel': 0.999031, 'recall_consonant': 0.998313, 'recall_word': 0.997818, 'acc_grapheme': 0.997809, 'acc_vowel': 0.999153, 'acc_consonant': 0.999378, 'acc_word': 0.997759, 'loss_grapheme': 0.009371, 'loss_vowel': 0.004609, 'loss_consonant': 0.002973, 'loss_word': 0.010263}
   30 | 0.000095 | 160640/160678 | 0.2567 | 5.2458 | 14.57 ||
val: {'recall': 0.997458, 'recall_grapheme': 0.996076, 'recall_vowel': 0.998504, 'recall_consonant': 0.999177, 'recall_word': 0.996873, 'acc_grapheme': 0.996863, 'acc_vowel': 0.998855, 'acc_consonant': 0.999153, 'acc_word': 0.996863, 'loss_grapheme': 0.01391, 'loss_vowel': 0.006248, 'loss_consonant': 0.00431, 'loss_word': 0.013823}
   31 | 0.000086 | 160640/160678 | 5.5823 | 5.3276 | 14.60 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997125, 'recall_grapheme': 0.995724, 'recall_vowel': 0.998909, 'recall_consonant': 0.998144, 'recall_word': 0.996994, 'acc_grapheme': 0.996937, 'acc_vowel': 0.999029, 'acc_consonant': 0.999104, 'acc_word': 0.996913, 'loss_grapheme': 0.013782, 'loss_vowel': 0.006047, 'loss_consonant': 0.004345, 'loss_word': 0.014376}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.79it/s]



val: {'recall': 0.998041, 'recall_grapheme': 0.99742, 'recall_vowel': 0.998975, 'recall_consonant': 0.998349, 'recall_word': 0.997769, 'acc_grapheme': 0.997859, 'acc_vowel': 0.999153, 'acc_consonant': 0.999427, 'acc_word': 0.997709, 'loss_grapheme': 0.009282, 'loss_vowel': 0.004571, 'loss_consonant': 0.002991, 'loss_word': 0.010217}
   32 | 0.000078 | 160640/160678 | 0.5919 | 5.2410 | 14.64 ||
val: {'recall': 0.997941, 'recall_grapheme': 0.996732, 'recall_vowel': 0.999022, 'recall_consonant': 0.999277, 'recall_word': 0.997319, 'acc_grapheme': 0.997535, 'acc_vowel': 0.999054, 'acc_consonant': 0.999253, 'acc_word': 0.997236, 'loss_grapheme': 0.011815, 'loss_vowel': 0.005973, 'loss_consonant': 0.004206, 'loss_word': 0.01213}
   33 | 0.000070 | 160640/160678 | 10.1644 | 5.4185 | 14.60 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997816, 'recall_grapheme': 0.996792, 'recall_vowel': 0.998615, 'recall_consonant': 0.999067, 'recall_word': 0.996946, 'acc_grapheme': 0.997236, 'acc_vowel': 0.998855, 'acc_consonant': 0.999054, 'acc_word': 0.996913, 'loss_grapheme': 0.013915, 'loss_vowel': 0.006617, 'loss_consonant': 0.004566, 'loss_word': 0.014077}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.79it/s]



val: {'recall': 0.998109, 'recall_grapheme': 0.997543, 'recall_vowel': 0.999002, 'recall_consonant': 0.998349, 'recall_word': 0.997843, 'acc_grapheme': 0.997884, 'acc_vowel': 0.999203, 'acc_consonant': 0.999427, 'acc_word': 0.997784, 'loss_grapheme': 0.009255, 'loss_vowel': 0.004585, 'loss_consonant': 0.002982, 'loss_word': 0.010173}
   34 | 0.000062 | 160640/160678 | 0.3335 | 5.1165 | 14.51 ||
val: {'recall': 0.99808, 'recall_grapheme': 0.997194, 'recall_vowel': 0.998808, 'recall_consonant': 0.999125, 'recall_word': 0.997242, 'acc_grapheme': 0.997435, 'acc_vowel': 0.999004, 'acc_consonant': 0.999104, 'acc_word': 0.997211, 'loss_grapheme': 0.012228, 'loss_vowel': 0.005703, 'loss_consonant': 0.004255, 'loss_word': 0.012804}
   35 | 0.000054 | 160640/160678 | 0.3949 | 5.3013 | 14.56 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998103, 'recall_grapheme': 0.99719, 'recall_vowel': 0.998927, 'recall_consonant': 0.999106, 'recall_word': 0.99711, 'acc_grapheme': 0.997161, 'acc_vowel': 0.998979, 'acc_consonant': 0.999129, 'acc_word': 0.997037, 'loss_grapheme': 0.013102, 'loss_vowel': 0.005594, 'loss_consonant': 0.004074, 'loss_word': 0.013366}
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.74it/s]



val: {'recall': 0.99812, 'recall_grapheme': 0.997522, 'recall_vowel': 0.999086, 'recall_consonant': 0.998349, 'recall_word': 0.997829, 'acc_grapheme': 0.997884, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.997759, 'loss_grapheme': 0.009242, 'loss_vowel': 0.004549, 'loss_consonant': 0.002976, 'loss_word': 0.01017}
   36 | 0.000047 | 160640/160678 | 0.2078 | 4.8793 | 14.68 ||
val: {'recall': 0.998102, 'recall_grapheme': 0.997193, 'recall_vowel': 0.998795, 'recall_consonant': 0.999224, 'recall_word': 0.997163, 'acc_grapheme': 0.997261, 'acc_vowel': 0.998954, 'acc_consonant': 0.999253, 'acc_word': 0.997112, 'loss_grapheme': 0.011946, 'loss_vowel': 0.005578, 'loss_consonant': 0.003887, 'loss_word': 0.012467}
   37 | 0.000041 | 160640/160678 | 3.3179 | 5.2388 | 14.56 ||
val: {'recall': 0.99818, 'recall_grapheme': 0.997343, 'recall_vowel': 0.998791, 'recall_consonant': 0.99924, 'recall_word': 0.997164, 'acc_grapheme': 0.997336, 'acc_vowel': 0.998904, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]

###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
SWA>>>:


100%|██████████| 1255/1255 [07:25<00:00,  2.87it/s]



val: {'recall': 0.99812, 'recall_grapheme': 0.997522, 'recall_vowel': 0.999086, 'recall_consonant': 0.998349, 'recall_word': 0.99789, 'acc_grapheme': 0.997884, 'acc_vowel': 0.999228, 'acc_consonant': 0.999427, 'acc_word': 0.997834, 'loss_grapheme': 0.009227, 'loss_vowel': 0.004542, 'loss_consonant': 0.002948, 'loss_word': 0.010151}
   38 | 0.000034 | 160640/160678 | 6.3619 | 5.0688 | 14.67 ||
val: {'recall': 0.997851, 'recall_grapheme': 0.996699, 'recall_vowel': 0.998863, 'recall_consonant': 0.999145, 'recall_word': 0.996965, 'acc_grapheme': 0.997112, 'acc_vowel': 0.999029, 'acc_consonant': 0.999203, 'acc_word': 0.996913, 'loss_grapheme': 0.013457, 'loss_vowel': 0.005713, 'loss_consonant': 0.003765, 'loss_word': 0.013643}
   39 | 0.000029 | 160640/160678 | 0.1548 | 4.9497 | 14.65 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997767, 'recall_grapheme': 0.996576, 'recall_vowel': 0.998777, 'recall_consonant': 0.999137, 'recall_word': 0.997026, 'acc_grapheme': 0.997161, 'acc_vowel': 0.999029, 'acc_consonant': 0.999278, 'acc_word': 0.996962, 'loss_grapheme': 0.013439, 'loss_vowel': 0.005828, 'loss_consonant': 0.003872, 'loss_word': 0.01343}
SWA>>>:


100%|██████████| 1255/1255 [07:28<00:00,  2.85it/s]



val: {'recall': 0.998414, 'recall_grapheme': 0.997569, 'recall_vowel': 0.999086, 'recall_consonant': 0.999431, 'recall_word': 0.99789, 'acc_grapheme': 0.997908, 'acc_vowel': 0.999228, 'acc_consonant': 0.999452, 'acc_word': 0.997834, 'loss_grapheme': 0.009208, 'loss_vowel': 0.004521, 'loss_consonant': 0.002917, 'loss_word': 0.010137}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   40 | 0.000023 | 160640/160678 | 0.1099 | 5.2530 | 14.59 ||
val: {'recall': 0.998044, 'recall_grapheme': 0.997001, 'recall_vowel': 0.998917, 'recall_consonant': 0.999258, 'recall_word': 0.997205, 'acc_grapheme': 0.997161, 'acc_vowel': 0.999079, 'acc_consonant': 0.999278, 'acc_word': 0.997137, 'loss_grapheme': 0.013107, 'loss_vowel': 0.00523, 'loss_consonant': 0.003554, 'loss_word': 0.013355}
   41 | 0.000019 | 160640/160678 | 6.3527 | 5.0418 | 14.64 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997956, 'recall_grapheme': 0.996988, 'recall_vowel': 0.998758, 'recall_consonant': 0.999092, 'recall_word': 0.997318, 'acc_grapheme': 0.997386, 'acc_vowel': 0.999004, 'acc_consonant': 0.999203, 'acc_word': 0.997261, 'loss_grapheme': 0.012335, 'loss_vowel': 0.005705, 'loss_consonant': 0.00362, 'loss_word': 0.01248}
SWA>>>:


100%|██████████| 1255/1255 [07:26<00:00,  2.87it/s]



val: {'recall': 0.998413, 'recall_grapheme': 0.997562, 'recall_vowel': 0.999098, 'recall_consonant': 0.999431, 'recall_word': 0.997919, 'acc_grapheme': 0.997884, 'acc_vowel': 0.999253, 'acc_consonant': 0.999452, 'acc_word': 0.997859, 'loss_grapheme': 0.009221, 'loss_vowel': 0.0045, 'loss_consonant': 0.002919, 'loss_word': 0.010149}
   42 | 0.000014 | 160640/160678 | 10.6267 | 5.1114 | 14.64 |
val: {'recall': 0.997724, 'recall_grapheme': 0.996545, 'recall_vowel': 0.998823, 'recall_consonant': 0.998982, 'recall_word': 0.997079, 'acc_grapheme': 0.997186, 'acc_vowel': 0.999079, 'acc_consonant': 0.999129, 'acc_word': 0.997012, 'loss_grapheme': 0.013765, 'loss_vowel': 0.00582, 'loss_consonant': 0.004092, 'loss_word': 0.013788}
   43 | 0.000011 | 160640/160678 | 1.4242 | 5.1521 | 14.61 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998102, 'recall_grapheme': 0.997162, 'recall_vowel': 0.998811, 'recall_consonant': 0.999274, 'recall_word': 0.997238, 'acc_grapheme': 0.997311, 'acc_vowel': 0.999029, 'acc_consonant': 0.999253, 'acc_word': 0.997186, 'loss_grapheme': 0.012514, 'loss_vowel': 0.005523, 'loss_consonant': 0.003409, 'loss_word': 0.012822}
SWA>>>:


100%|██████████| 1255/1255 [07:30<00:00,  2.78it/s]



val: {'recall': 0.998492, 'recall_grapheme': 0.997737, 'recall_vowel': 0.999091, 'recall_consonant': 0.999403, 'recall_word': 0.997962, 'acc_grapheme': 0.997933, 'acc_vowel': 0.999228, 'acc_consonant': 0.999452, 'acc_word': 0.997908, 'loss_grapheme': 0.009206, 'loss_vowel': 0.004505, 'loss_consonant': 0.002902, 'loss_word': 0.010147}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
   44 | 0.000007 | 160640/160678 | 3.1264 | 5.0192 | 14.64 ||
val: {'recall': 0.997855, 'recall_grapheme': 0.996737, 'recall_vowel': 0.998836, 'recall_consonant': 0.999111, 'recall_word': 0.997119, 'acc_grapheme': 0.996962, 'acc_vowel': 0.999079, 'acc_consonant': 0.999178, 'acc_word': 0.997087, 'loss_grapheme': 0.013595, 'loss_vowel': 0.00566, 'loss_consonant': 0.003851, 'loss_word': 0.013519}
   45 | 0.000005 | 160640/160678 | 5.7163 | 5.3286 | 14.60 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997352, 'recall_grapheme': 0.996286, 'recall_vowel': 0.998744, 'recall_consonant': 0.99809, 'recall_word': 0.996978, 'acc_grapheme': 0.996763, 'acc_vowel': 0.998954, 'acc_consonant': 0.999153, 'acc_word': 0.996913, 'loss_grapheme': 0.015288, 'loss_vowel': 0.006136, 'loss_consonant': 0.004212, 'loss_word': 0.015159}
SWA>>>:


100%|██████████| 1255/1255 [07:30<00:00,  2.85it/s]



val: {'recall': 0.998482, 'recall_grapheme': 0.997711, 'recall_vowel': 0.999103, 'recall_consonant': 0.999403, 'recall_word': 0.997983, 'acc_grapheme': 0.997908, 'acc_vowel': 0.999253, 'acc_consonant': 0.999452, 'acc_word': 0.997933, 'loss_grapheme': 0.009179, 'loss_vowel': 0.004486, 'loss_consonant': 0.002888, 'loss_word': 0.010123}
   46 | 0.000003 | 160640/160678 | 6.3207 | 5.0260 | 14.68 ||
val: {'recall': 0.997789, 'recall_grapheme': 0.996593, 'recall_vowel': 0.998826, 'recall_consonant': 0.999145, 'recall_word': 0.997141, 'acc_grapheme': 0.996937, 'acc_vowel': 0.999054, 'acc_consonant': 0.999178, 'acc_word': 0.997087, 'loss_grapheme': 0.014154, 'loss_vowel': 0.00571, 'loss_consonant': 0.003957, 'loss_word': 0.014027}
   47 | 0.000001 | 160640/160678 | 5.0727 | 5.3185 | 14.67 ||
val: {'recall': 0.998268, 'recall_grapheme': 0.997585, 'recall_vowel': 0.998839, 'recall_consonant': 0.999061, 'recall_word': 0.997331, 'acc_grapheme': 0.99746, 'acc_vowel': 0.999104, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]

###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
SWA>>>:


100%|██████████| 1255/1255 [07:30<00:00,  2.77it/s]



val: {'recall': 0.998482, 'recall_grapheme': 0.997711, 'recall_vowel': 0.999103, 'recall_consonant': 0.999403, 'recall_word': 0.997959, 'acc_grapheme': 0.997908, 'acc_vowel': 0.999253, 'acc_consonant': 0.999452, 'acc_word': 0.997908, 'loss_grapheme': 0.009157, 'loss_vowel': 0.004456, 'loss_consonant': 0.002886, 'loss_word': 0.010102}
   48 | 0.000000 | 160640/160678 | 5.1759 | 5.0752 | 14.69 ||
val: {'recall': 0.99797, 'recall_grapheme': 0.996977, 'recall_vowel': 0.998855, 'recall_consonant': 0.999073, 'recall_word': 0.997216, 'acc_grapheme': 0.997186, 'acc_vowel': 0.999129, 'acc_consonant': 0.999278, 'acc_word': 0.997161, 'loss_grapheme': 0.012723, 'loss_vowel': 0.005318, 'loss_consonant': 0.00351, 'loss_word': 0.01265}
   49 | 0.000000 | 160640/160678 | 10.3980 | 5.2344 | 14.67 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998145, 'recall_grapheme': 0.997263, 'recall_vowel': 0.998961, 'recall_consonant': 0.999095, 'recall_word': 0.99734, 'acc_grapheme': 0.99746, 'acc_vowel': 0.999153, 'acc_consonant': 0.999253, 'acc_word': 0.997286, 'loss_grapheme': 0.011741, 'loss_vowel': 0.005311, 'loss_consonant': 0.003502, 'loss_word': 0.011915}
SWA>>>:


100%|██████████| 1255/1255 [07:30<00:00,  2.70it/s]



val: {'recall': 0.998554, 'recall_grapheme': 0.997807, 'recall_vowel': 0.999103, 'recall_consonant': 0.999499, 'recall_word': 0.997983, 'acc_grapheme': 0.997933, 'acc_vowel': 0.999253, 'acc_consonant': 0.999477, 'acc_word': 0.997933, 'loss_grapheme': 0.009159, 'loss_vowel': 0.004458, 'loss_consonant': 0.002894, 'loss_word': 0.01011}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa
CYCLE: 2
    8 | 0.000277 | 160640/160678 | 6.5008 | 5.4948 | 14.68 ||
val: {'recall': 0.996787, 'recall_grapheme': 0.996248, 'recall_vowel': 0.998255, 'recall_consonant': 0.996396, 'recall_word': 0.996018, 'acc_grapheme': 0.99624, 'acc_vowel': 0.998282, 'acc_consonant': 0.998481, 'acc_word': 0.995966, 'loss_grapheme': 0.018003, 'loss_vowel': 0.008212, 'loss_consonant': 0.005905, 'loss_word': 0.018469}
    9 | 0.000273 | 104064/160678 | 6.6273 | 5.6662 | 9.48 |||

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: False

val: {'recall': 0.996864, 'recall_grapheme': 0.995341, 'recall_vowel': 0.998716, 'recall_consonant': 0.998059, 'recall_word': 0.995527, 'acc_grapheme': 0.995643, 'acc_vowel': 0.998456, 'acc_consonant': 0.998531, 'acc_word': 0.995543, 'loss_grapheme': 0.021051, 'loss_vowel': 0.009045, 'loss_consonant': 0.00687, 'loss_word': 0.019851}
CYCLE: 1
    0 | 0.000399 | 160640/160678 | 0.5148 | 5.8375 | 13.83 ||
val: {'recall': 0.994827, 'recall_grapheme': 0.993436, 'recall_vowel': 0.997847, 'recall_consonant': 0.994591, 'recall_word': 0.99367, 'acc_grapheme': 0.993501, 'acc_vowel': 0.997834, 'acc_consonant': 0.997784, 'acc_word': 0.993576, 'loss_grapheme': 0.027582, 'loss_vowel': 0.012216, 'loss_consonant': 0.01028, 'loss_word': 0.026025}
    1 | 0.000398 | 

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   20 | 0.000184 | 160640/160678 | 3.5539 | 5.8566 | 14.42 ||
val: {'recall': 0.996555, 'recall_grapheme': 0.994989, 'recall_vowel': 0.998411, 'recall_consonant': 0.99783, 'recall_word': 0.996139, 'acc_grapheme': 0.996439, 'acc_vowel': 0.998357, 'acc_consonant': 0.99883, 'acc_word': 0.996141, 'loss_grapheme': 0.016667, 'loss_vowel': 0.007622, 'loss_consonant': 0.005235, 'loss_word': 0.016536}
   21 | 0.000169 | 160640/160678 | 7.9928 | 5.4756 | 14.50 ||
val: {'recall': 0.996231, 'recall_grapheme': 0.994384, 'recall_vowel': 0.998098, 'recall_consonant': 0.998057, 'recall_word': 0.995292, 'acc_grapheme': 0.995518, 'acc_vowel': 0.998282, 'acc_consonant': 0.998631, 'acc_word': 0.995319, 'loss_grapheme': 0.019995, 'loss_vowel': 0.008366, 'loss_consonant': 0.005858, 'loss_word': 0.019362}
   22 | 0.000153 | 160640/160678 | 0.1657 | 5.4465 | 14.47 ||
val: {'recall': 0.996016, 'recall_grapheme': 0.994153, 'recall_vowel': 0.997745, 'recall_consonant': 0.998014, 'recall_word': 0.995117, 'acc_gra

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   28 | 0.000070 | 160640/160678 | 7.1218 | 5.6596 | 14.44 ||
val: {'recall': 0.997226, 'recall_grapheme': 0.995506, 'recall_vowel': 0.998533, 'recall_consonant': 0.999359, 'recall_word': 0.996388, 'acc_grapheme': 0.996315, 'acc_vowel': 0.998456, 'acc_consonant': 0.999178, 'acc_word': 0.99639, 'loss_grapheme': 0.01523, 'loss_vowel': 0.007205, 'loss_consonant': 0.004184, 'loss_word': 0.015197}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
   29 | 0.000059 | 160640/160678 | 0.9150 | 5.2730 | 14.51 ||
val: {'recall': 0.997157, 'recall_grapheme': 0.995557, 'recall_vowel': 0.998705, 'recall_consonant': 0.998809, 'recall_word': 0.99652, 'acc_grapheme': 0.996539, 'acc_vowel': 0.998581, 'acc_consonant': 0.999129, 'acc_word': 0.996539, 'loss_grapheme': 0.014689, 'loss_vowel': 0.007272, 'loss_consonant': 0.004466, 'loss_word': 0.014702}
   30 | 0.000048 | 160640/160678 | 11.7399 | 5.5561 | 14.44 |
val: {'recall': 0.996746, 'recall_grapheme': 0.995126, 'recall_vowel': 0

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   13 | 0.000145 | 160640/160678 | 3.6052 | 5.3858 | 14.55 ||
val: {'recall': 0.9972, 'recall_grapheme': 0.996244, 'recall_vowel': 0.998612, 'recall_consonant': 0.9977, 'recall_word': 0.996528, 'acc_grapheme': 0.99639, 'acc_vowel': 0.998556, 'acc_consonant': 0.99868, 'acc_word': 0.996439, 'loss_grapheme': 0.01712, 'loss_vowel': 0.006885, 'loss_consonant': 0.005825, 'loss_word': 0.016743}
   14 | 0.000138 | 160640/160678 | 2.5435 | 5.4727 | 14.51 ||
val: {'recall': 0.997059, 'recall_grapheme': 0.995488, 'recall_vowel': 0.998863, 'recall_consonant': 0.998399, 'recall_word': 0.996368, 'acc_grapheme': 0.996464, 'acc_vowel': 0.99878, 'acc_consonant': 0.998855, 'acc_word': 0.99634, 'loss_grapheme': 0.017036, 'loss_vowel': 0.006625, 'loss_consonant': 0.005677, 'loss_word': 0.0171}
   15 | 0.000131 | 160640/160678 | 5.5252 | 5.3911 | 14.55 ||
val: {'recall': 0.997457, 'recall_grapheme': 0.996516, 'recall_vowel': 0.998866, 'recall_consonant': 0.997931, 'recall_word': 0.997129, 'acc_grapheme': 0

   33 | 0.000011 | 160640/160678 | 6.0712 | 5.0062 | 14.53 ||
val: {'recall': 0.997484, 'recall_grapheme': 0.996719, 'recall_vowel': 0.998857, 'recall_consonant': 0.997638, 'recall_word': 0.996982, 'acc_grapheme': 0.997037, 'acc_vowel': 0.999054, 'acc_consonant': 0.999228, 'acc_word': 0.996962, 'loss_grapheme': 0.014909, 'loss_vowel': 0.006764, 'loss_consonant': 0.004052, 'loss_word': 0.015049}
   34 | 0.000008 | 160640/160678 | 12.4042 | 5.1806 | 14.50 |
val: {'recall': 0.997746, 'recall_grapheme': 0.997055, 'recall_vowel': 0.99883, 'recall_consonant': 0.998047, 'recall_word': 0.997351, 'acc_grapheme': 0.99741, 'acc_vowel': 0.999004, 'acc_consonant': 0.999178, 'acc_word': 0.997311, 'loss_grapheme': 0.012888, 'loss_vowel': 0.006307, 'loss_consonant': 0.003611, 'loss_word': 0.013399}
   35 | 0.000005 | 160640/160678 | 12.4352 | 5.1254 | 14.57 |

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth_swa, exist: False

val: {'recall': 0.863007, 'recall_grapheme': 0.799473, 'recall_vowel': 0.910495, 'recall_consonant': 0.942587, 'recall_word': 0.792659, 'acc_grapheme': 0.579279, 'acc_vowel': 0.896942, 'acc_consonant': 0.816369, 'acc_word': 0.390718, 'loss_grapheme': 1.777621, 'loss_vowel': 0.442087, 'loss_consonant': 0.433245, 'loss_word': 2.889786}
CYCLE: 1


  'recall', 'true', average, warn_for)


    0 | 0.000292 | 160640/160678 | 9.0631 | 11.0421 | 13.91 ||
val: {'recall': 0.990122, 'recall_grapheme': 0.987137, 'recall_vowel': 0.994537, 'recall_consonant': 0.991677, 'recall_word': 0.986548, 'acc_grapheme': 0.987476, 'acc_vowel': 0.994921, 'acc_consonant': 0.995892, 'acc_word': 0.986355, 'loss_grapheme': 0.081153, 'loss_vowel': 0.043929, 'loss_consonant': 0.032938, 'loss_word': 0.071857}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
    1 | 0.000543 | 160640/160678 | 11.2862 | 7.7721 | 14.02 |
val: {'recall': 0.99057, 'recall_grapheme': 0.985973, 'recall_vowel': 0.994439, 'recall_consonant': 0.995896, 'recall_word': 0.987708, 'acc_grapheme': 0.98765, 'acc_vowel': 0.994921, 'acc_consonant': 0.996116, 'acc_word': 0.987227, 'loss_grapheme': 0.050568, 'loss_vowel': 0.022158, 'loss_consonant': 0.018201, 'loss_word': 0.050209}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold2_380.pth
    2 | 0.000476 | 160640/160678 | 1.3762 | 7.5592 | 14.18 

    9 | 0.000171 | 160640/160678 | 6.6217 | 6.1082 | 14.56 ||
val: {'recall': 0.99469, 'recall_grapheme': 0.993247, 'recall_vowel': 0.997689, 'recall_consonant': 0.994578, 'recall_word': 0.993979, 'acc_grapheme': 0.993974, 'acc_vowel': 0.997958, 'acc_consonant': 0.997834, 'acc_word': 0.993974, 'loss_grapheme': 0.028306, 'loss_vowel': 0.012581, 'loss_consonant': 0.009692, 'loss_word': 0.026933}
   10 | 0.000165 | 160640/160678 | 13.6986 | 5.9318 | 14.58 |
val: {'recall': 0.994702, 'recall_grapheme': 0.992307, 'recall_vowel': 0.997553, 'recall_consonant': 0.996642, 'recall_word': 0.993668, 'acc_grapheme': 0.993576, 'acc_vowel': 0.997784, 'acc_consonant': 0.997859, 'acc_word': 0.993651, 'loss_grapheme': 0.029794, 'loss_vowel': 0.013136, 'loss_consonant': 0.01068, 'loss_word': 0.02701}
   11 | 0.000159 | 160640/160678 | 0.8778 | 6.0795 | 14.56 ||
val: {'recall': 0.995492, 'recall_grapheme': 0.99389, 'recall_vowel': 0.998107, 'recall_consonant': 0.996083, 'recall_word': 0.994334, 'acc_graph

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: False

val: {'recall': 0.796988, 'recall_grapheme': 0.763845, 'recall_vowel': 0.863414, 'recall_consonant': 0.796848, 'recall_word': 0.762264, 'acc_grapheme': 0.493819, 'acc_vowel': 0.788904, 'acc_consonant': 0.767645, 'acc_word': 0.316245, 'loss_grapheme': 1.992506, 'loss_vowel': 0.626105, 'loss_consonant': 0.522644, 'loss_word': 3.290409}
CYCLE: 1


  'recall', 'true', average, warn_for)


    0 | 0.000292 | 160640/160716 | 6.7174 | 10.7151 | 13.99 ||
val: {'recall': 0.992886, 'recall_grapheme': 0.989107, 'recall_vowel': 0.996141, 'recall_consonant': 0.997187, 'recall_word': 0.9882, 'acc_grapheme': 0.989607, 'acc_vowel': 0.996087, 'acc_consonant': 0.996087, 'acc_word': 0.988212, 'loss_grapheme': 0.129831, 'loss_vowel': 0.068911, 'loss_consonant': 0.05198, 'loss_word': 0.144907}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth
    1 | 0.000543 | 160640/160716 | 7.1504 | 7.3232 | 14.15 ||
val: {'recall': 0.992768, 'recall_grapheme': 0.989784, 'recall_vowel': 0.995191, 'recall_consonant': 0.996311, 'recall_word': 0.989335, 'acc_grapheme': 0.99018, 'acc_vowel': 0.995364, 'acc_consonant': 0.995464, 'acc_word': 0.988884, 'loss_grapheme': 0.069323, 'loss_vowel': 0.043396, 'loss_consonant': 0.0354, 'loss_word': 0.072684}
    2 | 0.000476 | 160640/160716 | 16.1799 | 6.6101 | 14.24 |
val: {'recall': 0.994403, 'recall_grapheme': 0.992025, 'recall_vowel': 0.9

KeyboardInterrupt: 

In [44]:
time.time()

1582104595.1300395

In [None]:
#save_model(model, model_file)

In [None]:
del model