In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image
from utils import bn_update, moving_average, copy_model


In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
#import albumentations as albu
def get_train_augs():
    return RandAugment(n=2, m=27)

In [9]:
#plt.imshow(x)

In [10]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [11]:
#train_loader, val_loader = get_train_val_loaders()

In [12]:
#for x in train_loader:
#    print(x)
#    break

# model

In [13]:
#import pretrainedmodels

In [14]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [15]:
from argparse import Namespace
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

class BengaliNet(nn.Module):
    """
    EfficientNet B0-B8.
    Args:
        cfg (CfgNode): configs
    """
    def __init__(self, cfg):
        super(BengaliNet, self).__init__()
        model_name = cfg.MODEL_NAME
        pretrained = cfg.PRETRAINED
        input_channels = cfg.IN_CHANNELS
        pool_type = cfg.POOL_TYPE
        drop_connect_rate = cfg.DROP_CONNECT
        self.drop_rate = cfg.DROPOUT
        cls_head = cfg.CLS_HEAD
        num_total_classes = cfg.NUM_GRAPHEME_CLASSES + cfg.NUM_VOWEL_CLASSES + cfg.NUM_CONSONANT_CLASSES \
            + cfg.NUM_WORD_CLASSES

        backbone = timm.create_model(
            model_name=model_name,
            pretrained=pretrained,
            in_chans=input_channels,
            drop_connect_rate=drop_connect_rate,
        )
        self.conv_stem = backbone.conv_stem
        self.bn1 = backbone.bn1
        self.act1 = backbone.act1
        ### Original blocks ###
        for i in range(len((backbone.blocks))):
            setattr(self, "block{}".format(str(i)), backbone.blocks[i])
        self.conv_head = backbone.conv_head
        self.bn2 = backbone.bn2
        self.act2 = backbone.act2
        self.aux_block5 = backbone.blocks[5]
        self.aux_num_features = self.block5[-1].bn3.num_features
        self.aux_head4 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn4 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act4 = Swish()
        self.aux_head5 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn5 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act5 = Swish()
        self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
        self.num_features = backbone.num_features * self.global_pool.feat_mult()
        assert cls_head == 'linear'
        if cls_head == "linear":
            ### Baseline head ###
            self.fc = nn.Linear(self.num_features, num_total_classes)            
            self.aux_fc1 = nn.Linear(self.aux_num_features*4, num_total_classes)
            self.aux_fc2 = nn.Linear(self.aux_num_features*4, num_total_classes)
            
            for fc in [self.fc, self.aux_fc1, self.aux_fc2]:
                nn.init.zeros_(fc.bias.data)
        elif cls_head == "norm_softmax":
            ### NormSoftmax ###
            self.grapheme_fc = NormSoftmax(self.num_features, num_grapheme_classes)
            self.consonant_fc = NormSoftmax(self.num_features, num_consonant_classes)
            self.vowel_fc = NormSoftmax(self.num_features, num_vowel_classes)
        # Replace with Mish activation
        if cfg.MODEL_ACTIVATION == "mish":
            convert_swish_to_mish(self)
        del backbone

    def _features(self, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.block0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x); b4 = x
        x = self.block5(x); b4 = self.aux_block5(b4); b5 = x
        x = self.block6(x)
        x = self.conv_head(x); b4 = self.aux_head4(b4); b5 = self.aux_head5(b5)
        x = self.bn2(x); b4 = self.bn4(b4); b5 = self.bn5(b5)
        x = self.act2(x); b4 = self.act4(b4); b5 = self.act5(b5)
        return b4, b5, x

    def forward(self, x):
        x = F.interpolate(x, size=(380, 380), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)

        # _, _, x = self._features(x)
        b4, b5, x = self._features(x)
        x = self.global_pool(x); b4 = self.global_pool(b4); b5 = self.global_pool(b5)
        x = torch.flatten(x, 1); b4 = torch.flatten(b4, 1); b5 = torch.flatten(b5, 1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        logits = self.fc(x)
        
        aux_logits1 = self.aux_fc1(b4)
        aux_logits2 = self.aux_fc2(b5)
        
        return logits, aux_logits1, aux_logits2

In [16]:
MODEL_DIR = './model4-ckps'
def create_model(cfg):
    model = BengaliNet(cfg)
    model_file = os.path.join(MODEL_DIR, cfg.MODEL_NAME, cfg.CKP_NAME)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [17]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [18]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [19]:
round(1/9, 6)

0.111111

In [20]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, preds3, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2) == len(preds3)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    recall_word = sklearn.metrics.recall_score(preds3, y[:, 3], average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    metrics['recall_word'] = round(recall_word, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    metrics['acc_word'] = round((preds3 == y[:, 3]).sum() / len(y), 6)    
    
    return metrics

In [21]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    loss3 = F.cross_entropy(outputs[3], y_true[:, 3], reduction='mean')
    
    return loss0 + loss1 + loss2 + loss3 #, loss0.item(), loss1.item(), loss2.item()

In [22]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2, loss3 = 0., 0., 0., 0.
    preds0, preds1, preds2, preds3 = [], [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs, _, _ = model(x)
            outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            preds3.append(torch.max(outputs[3], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            loss3 += F.cross_entropy(outputs[3], y[:, 3], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    preds3 = torch.cat(preds3, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, preds3, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    metrics['loss_word'] = round(loss3 / val_loader.num, 6)
    
    return metrics
            

In [23]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [24]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [25]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [26]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [27]:
def rand_bbox_new(size, lam):
    H = size[2]
    W = size[3]

    x_margin_rate = 0.2

    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * (1-x_margin_rate*2) * cut_rat)
    cut_h = np.int(H * cut_rat)
    
    min_x_center = np.int(W * x_margin_rate + cut_w / 2)
    max_x_center = np.int(W * (1-x_margin_rate) - cut_w / 2)
    #print(min_x_center, max_x_center, lam, cut_w)
    min_y_center = cut_h // 2
    max_y_center = H - cut_h // 2
    if max_y_center == min_y_center:
        max_y_center += 1

    # uniform
    cx = np.random.randint(min_x_center, max_x_center)
    cy = np.random.randint(min_y_center, max_y_center)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)
    
    #print(bbx1, bbx2, bby1, bby2)

    return bbx1, bby1, bbx2, bby2

In [28]:
np.random.random()

0.029572411820954048

In [29]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from gridmask import GridMask

In [30]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [31]:
import time
def train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid):
    train_loss = 0
    bg = time.time()

    for batch_idx, (img, orig_img, targets) in enumerate(train_loader):
        img, orig_img, targets  = img.cuda(), orig_img.cuda(), targets.cuda()
        batch_size = img.size(0)
        r = np.random.rand()

        if r < 0.3:
            # generate mixed sample
            lam = np.random.beta(args.beta, args.beta)
            rand_index = torch.randperm(img.size()[0]).cuda()
            target_a = targets
            target_b = targets[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
            #img[:, :, bby1:bby2, bbx1:bbx2] = img[rand_index, :, bby1:bby2, bbx1:bbx2] #for new cutmix
            img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
            
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
            # compute output
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            loss_aux1 = criterion(outputs_aux1, target_a) * lam + criterion(outputs_aux1, target_b) * (1. - lam)
            loss_aux2 = criterion(outputs_aux2, target_a) * lam + criterion(outputs_aux2, target_b) * (1. - lam)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        elif r > 0.7:
            img = grid(img)
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, targets)
            loss_aux1 = criterion(outputs_aux1, targets)
            loss_aux2 = criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        else:
            orig_img, targets = mixup(orig_img, targets)
            outputs, outputs_aux1, outputs_aux2 = model(orig_img)
            loss_primary = mixup_criterion(outputs, targets)
            loss_aux1 = mixup_criterion(outputs_aux1, targets)
            loss_aux2 = mixup_criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
            #loss = criterion(outputs, targets)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        lr_scheduler(optimizer, batch_idx, epoch)
        optimizer.step()            
        
        current_lr = get_lrs(optimizer)

        train_loss += loss.item()
        print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} | {:.2f} |'.format(
            epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
            loss.item(), train_loss/(batch_idx+1), (time.time() - bg) / 60), end='')


In [32]:
import copy
best_metrics = 0.
best_metrics_swa = 0.


def validate_and_save(model, model_file, val_loader, save=False):
    global best_metrics
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics:
        best_metrics = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()

def validate_and_save_swa(model, model_file, val_loader, save=False):
    global best_metrics_swa
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics_swa:
        best_metrics_swa = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()


def train(args):
    model, model_file = create_model(cfg)
    model = model.cuda()

    swa_cfg = copy.deepcopy(cfg)
    swa_cfg.CKP_NAME = cfg.CKP_NAME + '_swa'
    swa_model, swa_model_file = create_model(swa_cfg)
    swa_model = swa_model.cuda()

    optimizer = make_optimizer(model)
    lr_scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    [model, swa_model], optimizer = amp.initialize([model, swa_model], optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        swa_model = nn.DataParallel(swa_model)
    
    validate_and_save(model, model_file, val_loader, save=False)

    swa_model_loaded = False
    if os.path.exists(swa_model_file):
        swa_model_loaded = True
        validate_and_save_swa(swa_model, swa_model_file, val_loader, save=False)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

        for epoch in range(args.start_epoch, args.num_epochs):
            grid.set_prob(epoch, args.st_epochs)
            train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid)
            validate_and_save(model, model_file, val_loader, save=True)
            
            if (epoch+1) == args.swa_start and cycle == 1:
                if not swa_model_loaded:
                    copy_model(swa_model, model)
                swa_n = args.swa_n
            if (epoch+1) >= args.swa_start and (epoch+1) % args.swa_freq == 0:
                print('SWA>>>:')
                moving_average(swa_model, model, 1.0 / (swa_n + 1))
                swa_n += 1
                bn_update(train_loader, swa_model)
                validate_and_save_swa(swa_model, swa_model_file, val_loader, save=True)

        #args.base_lr = 4e-4
        #args.num_epochs = 60
        #args.warmup_epochs = 3

        # reset scheduler at each cycle
        lr_scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

In [33]:
cfg = Namespace()
cfg.MODEL_NAME = 'tf_efficientnet_b4'
cfg.PRETRAINED = True
cfg.IN_CHANNELS = 1
cfg.POOL_TYPE = 'avg'
cfg.CLS_HEAD = 'linear'
cfg.MODEL_ACTIVATION = 'swish'
cfg.DROP_CONNECT = 0.2
cfg.DROPOUT= 0.
cfg.NUM_WORD_CLASSES = 1295
cfg.NUM_GRAPHEME_CLASSES = 168
cfg.NUM_VOWEL_CLASSES = 11
cfg.NUM_CONSONANT_CLASSES = 7
cfg.CKP_NAME = 'model4_eb4_fold3_380.pth'

In [34]:
#model, model_file = create_model(cfg)
#model(torch.randn(2,1,137,236))[0].size()

In [35]:
args = Namespace()

args.base_lr = 3e-4
args.num_epochs = 60
args.start_epoch = 0
args.warmup_epochs = 10
args.num_cycles = 100
args.batch_size = 128
args.val_batch_size = 512
args.st_epochs = 5

args.swa_start = 2
args.swa_freq = 3
args.swa_n = 3

args.beta = 1.0
args.cutmix_prob = 0.5

In [36]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=3)

(200840, 6)
(200840, 32332)
(160716, 6) (40124, 6)


In [None]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa...

val: {'recall': 0.998467, 'recall_grapheme': 0.99772, 'recall_vowel': 0.998871, 'recall_consonant': 0.999558, 'recall_word': 0.99743, 'acc_grapheme': 0.997582, 'acc_vowel': 0.998704, 'acc_consonant': 0.999477, 'acc_word': 0.997458, 'loss_grapheme': 0.012791, 'loss_vowel': 0.006768, 'loss_consonant': 0.002529, 'loss_word': 0.01347}

val: {'recall': 0.998677, 'recall_grapheme': 0.998053, 'recall_vowel': 0.998936, 'recall_consonant': 0.999667, 'recall_word': 0.997745, 'acc_grapheme': 0.998056, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997732, 'loss_grapheme': 0.011116, 'loss_vowel': 0.005994, 'loss_consonant': 0.002174, 'loss_word': 0.012167}
CYCLE: 1
    0 | 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998139, 'recall_grapheme': 0.997313, 'recall_vowel': 0.998481, 'recall_consonant': 0.999447, 'recall_word': 0.997014, 'acc_grapheme': 0.997209, 'acc_vowel': 0.99853, 'acc_consonant': 0.999227, 'acc_word': 0.997009, 'loss_grapheme': 0.01459, 'loss_vowel': 0.006974, 'loss_consonant': 0.003406, 'loss_word': 0.014635}
SWA>>>:


100%|██████████| 1255/1255 [07:07<00:00,  2.90it/s]



val: {'recall': 0.998648, 'recall_grapheme': 0.998056, 'recall_vowel': 0.99884, 'recall_consonant': 0.999639, 'recall_word': 0.997759, 'acc_grapheme': 0.998056, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997732, 'loss_grapheme': 0.011008, 'loss_vowel': 0.005916, 'loss_consonant': 0.00213, 'loss_word': 0.012015}
    3 | 0.000119 | 160640/160716 | 0.0217 | 5.2222 | 14.18 ||
val: {'recall': 0.998229, 'recall_grapheme': 0.997337, 'recall_vowel': 0.998754, 'recall_consonant': 0.999489, 'recall_word': 0.997086, 'acc_grapheme': 0.997358, 'acc_vowel': 0.998579, 'acc_consonant': 0.999302, 'acc_word': 0.997134, 'loss_grapheme': 0.01389, 'loss_vowel': 0.007072, 'loss_consonant': 0.003256, 'loss_word': 0.01489}
    4 | 0.000147 | 160640/160716 | 0.1496 | 5.0842 | 14.36 ||
val: {'recall': 0.997871, 'recall_grapheme': 0.996877, 'recall_vowel': 0.998279, 'recall_consonant': 0.99945, 'recall_word': 0.996458, 'acc_grapheme': 0.996685, 'acc_vowel': 0.99828, 'acc_consonant': 0.99912

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997897, 'recall_grapheme': 0.996845, 'recall_vowel': 0.998511, 'recall_consonant': 0.999387, 'recall_word': 0.996457, 'acc_grapheme': 0.996885, 'acc_vowel': 0.99848, 'acc_consonant': 0.999078, 'acc_word': 0.996436, 'loss_grapheme': 0.016478, 'loss_vowel': 0.008389, 'loss_consonant': 0.004269, 'loss_word': 0.018029}
SWA>>>:


100%|██████████| 1255/1255 [07:02<00:00,  2.92it/s]



val: {'recall': 0.998631, 'recall_grapheme': 0.998019, 'recall_vowel': 0.998826, 'recall_consonant': 0.999661, 'recall_word': 0.997683, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998829, 'acc_consonant': 0.999452, 'acc_word': 0.997657, 'loss_grapheme': 0.011307, 'loss_vowel': 0.006126, 'loss_consonant': 0.00227, 'loss_word': 0.012407}
    6 | 0.000203 | 160640/160716 | 0.3573 | 5.2923 | 14.43 ||
val: {'recall': 0.997851, 'recall_grapheme': 0.997002, 'recall_vowel': 0.998393, 'recall_consonant': 0.999005, 'recall_word': 0.996364, 'acc_grapheme': 0.996735, 'acc_vowel': 0.99843, 'acc_consonant': 0.998829, 'acc_word': 0.996386, 'loss_grapheme': 0.016211, 'loss_vowel': 0.007998, 'loss_consonant': 0.004733, 'loss_word': 0.01706}
    7 | 0.000230 | 160640/160716 | 0.0862 | 5.2428 | 14.45 ||
val: {'recall': 0.99754, 'recall_grapheme': 0.996247, 'recall_vowel': 0.998315, 'recall_consonant': 0.99935, 'recall_word': 0.996234, 'acc_grapheme': 0.996685, 'acc_vowel': 0.99848, 'acc_consonant': 0.99892

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997588, 'recall_grapheme': 0.996698, 'recall_vowel': 0.997703, 'recall_consonant': 0.999253, 'recall_word': 0.995994, 'acc_grapheme': 0.996361, 'acc_vowel': 0.997882, 'acc_consonant': 0.998978, 'acc_word': 0.995963, 'loss_grapheme': 0.018018, 'loss_vowel': 0.008825, 'loss_consonant': 0.004723, 'loss_word': 0.019284}
SWA>>>:


100%|██████████| 1255/1255 [07:09<00:00,  2.83it/s]



val: {'recall': 0.998573, 'recall_grapheme': 0.997902, 'recall_vowel': 0.998823, 'recall_consonant': 0.999667, 'recall_word': 0.997751, 'acc_grapheme': 0.997832, 'acc_vowel': 0.998804, 'acc_consonant': 0.999477, 'acc_word': 0.997732, 'loss_grapheme': 0.011292, 'loss_vowel': 0.00607, 'loss_consonant': 0.002166, 'loss_word': 0.012427}
    9 | 0.000280 | 160640/160716 | 0.8170 | 5.4379 | 14.44 ||
val: {'recall': 0.997248, 'recall_grapheme': 0.996205, 'recall_vowel': 0.998304, 'recall_consonant': 0.998277, 'recall_word': 0.995014, 'acc_grapheme': 0.996012, 'acc_vowel': 0.998081, 'acc_consonant': 0.99828, 'acc_word': 0.994941, 'loss_grapheme': 0.019828, 'loss_vowel': 0.010113, 'loss_consonant': 0.007176, 'loss_word': 0.024107}
   10 | 0.000276 | 160640/160716 | 0.1469 | 5.3271 | 14.41 ||
val: {'recall': 0.997746, 'recall_grapheme': 0.996686, 'recall_vowel': 0.998384, 'recall_consonant': 0.99923, 'recall_word': 0.996459, 'acc_grapheme': 0.996785, 'acc_vowel': 0.998405, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997604, 'recall_grapheme': 0.996775, 'recall_vowel': 0.998181, 'recall_consonant': 0.998685, 'recall_word': 0.996225, 'acc_grapheme': 0.996586, 'acc_vowel': 0.998031, 'acc_consonant': 0.999028, 'acc_word': 0.996212, 'loss_grapheme': 0.016219, 'loss_vowel': 0.008863, 'loss_consonant': 0.004369, 'loss_word': 0.017492}
SWA>>>:


100%|██████████| 1255/1255 [07:10<00:00,  2.80it/s]



val: {'recall': 0.998613, 'recall_grapheme': 0.997923, 'recall_vowel': 0.998899, 'recall_consonant': 0.999706, 'recall_word': 0.997718, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998903, 'acc_consonant': 0.999526, 'acc_word': 0.997682, 'loss_grapheme': 0.011301, 'loss_vowel': 0.006171, 'loss_consonant': 0.002185, 'loss_word': 0.012525}
   12 | 0.000267 | 160640/160716 | 0.7925 | 5.2547 | 14.47 ||
val: {'recall': 0.997558, 'recall_grapheme': 0.996561, 'recall_vowel': 0.998552, 'recall_consonant': 0.99856, 'recall_word': 0.9962, 'acc_grapheme': 0.996536, 'acc_vowel': 0.998405, 'acc_consonant': 0.998779, 'acc_word': 0.996237, 'loss_grapheme': 0.016791, 'loss_vowel': 0.007631, 'loss_consonant': 0.004435, 'loss_word': 0.017615}
   13 | 0.000261 | 160640/160716 | 6.8214 | 5.5810 | 14.43 ||
val: {'recall': 0.997958, 'recall_grapheme': 0.997171, 'recall_vowel': 0.998488, 'recall_consonant': 0.999003, 'recall_word': 0.996637, 'acc_grapheme': 0.997059, 'acc_vowel': 0.998181, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997666, 'recall_grapheme': 0.996767, 'recall_vowel': 0.998333, 'recall_consonant': 0.998798, 'recall_word': 0.996232, 'acc_grapheme': 0.996635, 'acc_vowel': 0.99833, 'acc_consonant': 0.998754, 'acc_word': 0.996262, 'loss_grapheme': 0.018253, 'loss_vowel': 0.009038, 'loss_consonant': 0.005094, 'loss_word': 0.018969}
SWA>>>:


 90%|████████▉ | 1126/1255 [06:25<00:43,  2.94it/s]

In [73]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa...

val: {'recall': 0.998467, 'recall_grapheme': 0.99772, 'recall_vowel': 0.998871, 'recall_consonant': 0.999558, 'recall_word': 0.99743, 'acc_grapheme': 0.997582, 'acc_vowel': 0.998704, 'acc_consonant': 0.999477, 'acc_word': 0.997458, 'loss_grapheme': 0.012791, 'loss_vowel': 0.006768, 'loss_consonant': 0.002529, 'loss_word': 0.01347}

val: {'recall': 0.998671, 'recall_grapheme': 0.998067, 'recall_vowel': 0.998935, 'recall_consonant': 0.999613, 'recall_word': 0.997673, 'acc_grapheme': 0.997981, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997632, 'loss_grapheme': 0.011121, 'loss_vowel': 0.006063, 'loss_consonant': 0.002289, 'loss_word': 0.012252}
CYCLE: 1
    0 | 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997929, 'recall_grapheme': 0.996827, 'recall_vowel': 0.998736, 'recall_consonant': 0.999326, 'recall_word': 0.996394, 'acc_grapheme': 0.996885, 'acc_vowel': 0.99848, 'acc_consonant': 0.999178, 'acc_word': 0.996411, 'loss_grapheme': 0.01605, 'loss_vowel': 0.007821, 'loss_consonant': 0.003694, 'loss_word': 0.017375}
SWA>>>:


100%|██████████| 1255/1255 [07:05<00:00,  2.83it/s]



val: {'recall': 0.998568, 'recall_grapheme': 0.997907, 'recall_vowel': 0.998851, 'recall_consonant': 0.999607, 'recall_word': 0.997571, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998779, 'acc_consonant': 0.999452, 'acc_word': 0.997558, 'loss_grapheme': 0.011218, 'loss_vowel': 0.006183, 'loss_consonant': 0.00223, 'loss_word': 0.012368}
    3 | 0.000099 | 160640/160716 | 0.0395 | 5.0809 | 14.22 ||
val: {'recall': 0.998415, 'recall_grapheme': 0.9975, 'recall_vowel': 0.999015, 'recall_consonant': 0.999644, 'recall_word': 0.997189, 'acc_grapheme': 0.997458, 'acc_vowel': 0.998729, 'acc_consonant': 0.999477, 'acc_word': 0.997184, 'loss_grapheme': 0.014277, 'loss_vowel': 0.00677, 'loss_consonant': 0.002857, 'loss_word': 0.014951}
    4 | 0.000123 | 160640/160716 | 13.6762 | 5.1560 | 14.33 |
val: {'recall': 0.99774, 'recall_grapheme': 0.996756, 'recall_vowel': 0.998246, 'recall_consonant': 0.999204, 'recall_word': 0.996235, 'acc_grapheme': 0.99666, 'acc_vowel': 0.998455, 'acc_consonant': 0.99897

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997823, 'recall_grapheme': 0.996758, 'recall_vowel': 0.998445, 'recall_consonant': 0.999332, 'recall_word': 0.99619, 'acc_grapheme': 0.99671, 'acc_vowel': 0.99848, 'acc_consonant': 0.999202, 'acc_word': 0.996212, 'loss_grapheme': 0.016203, 'loss_vowel': 0.008032, 'loss_consonant': 0.003702, 'loss_word': 0.01693}
SWA>>>:


100%|██████████| 1255/1255 [07:09<00:00,  2.81it/s]



val: {'recall': 0.998574, 'recall_grapheme': 0.997924, 'recall_vowel': 0.998848, 'recall_consonant': 0.999601, 'recall_word': 0.997588, 'acc_grapheme': 0.997981, 'acc_vowel': 0.998829, 'acc_consonant': 0.999427, 'acc_word': 0.997558, 'loss_grapheme': 0.011157, 'loss_vowel': 0.006187, 'loss_consonant': 0.002199, 'loss_word': 0.012292}
    6 | 0.000169 | 160640/160716 | 7.2262 | 5.1593 | 14.42 ||
val: {'recall': 0.997885, 'recall_grapheme': 0.996916, 'recall_vowel': 0.998423, 'recall_consonant': 0.999285, 'recall_word': 0.996174, 'acc_grapheme': 0.99676, 'acc_vowel': 0.998255, 'acc_consonant': 0.999128, 'acc_word': 0.996237, 'loss_grapheme': 0.015642, 'loss_vowel': 0.008357, 'loss_consonant': 0.003476, 'loss_word': 0.017584}
    7 | 0.000191 | 160640/160716 | 12.8021 | 5.1535 | 14.45 |
val: {'recall': 0.99791, 'recall_grapheme': 0.996892, 'recall_vowel': 0.998532, 'recall_consonant': 0.999326, 'recall_word': 0.996456, 'acc_grapheme': 0.99691, 'acc_vowel': 0.998455, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99792, 'recall_grapheme': 0.99707, 'recall_vowel': 0.998497, 'recall_consonant': 0.999042, 'recall_word': 0.996734, 'acc_grapheme': 0.997059, 'acc_vowel': 0.998305, 'acc_consonant': 0.998854, 'acc_word': 0.996735, 'loss_grapheme': 0.014721, 'loss_vowel': 0.007574, 'loss_consonant': 0.004414, 'loss_word': 0.015663}
SWA>>>:


100%|██████████| 1255/1255 [07:10<00:00,  2.80it/s]



val: {'recall': 0.998605, 'recall_grapheme': 0.998002, 'recall_vowel': 0.998856, 'recall_consonant': 0.999562, 'recall_word': 0.997798, 'acc_grapheme': 0.998056, 'acc_vowel': 0.998829, 'acc_consonant': 0.999377, 'acc_word': 0.997782, 'loss_grapheme': 0.011096, 'loss_vowel': 0.006089, 'loss_consonant': 0.002204, 'loss_word': 0.012253}
    9 | 0.000233 | 160640/160716 | 6.6431 | 5.3578 | 14.50 ||
val: {'recall': 0.997706, 'recall_grapheme': 0.996497, 'recall_vowel': 0.998644, 'recall_consonant': 0.999186, 'recall_word': 0.995945, 'acc_grapheme': 0.996187, 'acc_vowel': 0.998455, 'acc_consonant': 0.998654, 'acc_word': 0.995913, 'loss_grapheme': 0.017225, 'loss_vowel': 0.008495, 'loss_consonant': 0.005929, 'loss_word': 0.018355}
   10 | 0.000230 | 160640/160716 | 5.6780 | 5.3569 | 14.48 ||
val: {'recall': 0.99699, 'recall_grapheme': 0.9955, 'recall_vowel': 0.998359, 'recall_consonant': 0.9986, 'recall_word': 0.995426, 'acc_grapheme': 0.995713, 'acc_vowel': 0.99823, 'acc_consonant': 0.99843

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997188, 'recall_grapheme': 0.995769, 'recall_vowel': 0.998227, 'recall_consonant': 0.998985, 'recall_word': 0.995359, 'acc_grapheme': 0.995963, 'acc_vowel': 0.997981, 'acc_consonant': 0.998729, 'acc_word': 0.995339, 'loss_grapheme': 0.019265, 'loss_vowel': 0.009449, 'loss_consonant': 0.005073, 'loss_word': 0.020637}
SWA>>>:


100%|██████████| 1255/1255 [07:14<00:00,  2.75it/s]



val: {'recall': 0.998647, 'recall_grapheme': 0.998052, 'recall_vowel': 0.998884, 'recall_consonant': 0.999599, 'recall_word': 0.997738, 'acc_grapheme': 0.998006, 'acc_vowel': 0.998854, 'acc_consonant': 0.999427, 'acc_word': 0.997732, 'loss_grapheme': 0.01104, 'loss_vowel': 0.006097, 'loss_consonant': 0.002218, 'loss_word': 0.012258}
   12 | 0.000222 | 160640/160716 | 16.2454 | 5.5822 | 14.48 |
val: {'recall': 0.9975, 'recall_grapheme': 0.99633, 'recall_vowel': 0.998262, 'recall_consonant': 0.999079, 'recall_word': 0.995858, 'acc_grapheme': 0.996262, 'acc_vowel': 0.998131, 'acc_consonant': 0.998804, 'acc_word': 0.995763, 'loss_grapheme': 0.018952, 'loss_vowel': 0.00942, 'loss_consonant': 0.004836, 'loss_word': 0.019586}
   13 | 0.000218 | 160640/160716 | 0.3368 | 5.3145 | 14.48 ||
val: {'recall': 0.997204, 'recall_grapheme': 0.996028, 'recall_vowel': 0.998272, 'recall_consonant': 0.99849, 'recall_word': 0.995943, 'acc_grapheme': 0.996237, 'acc_vowel': 0.998106, 'acc_consonant': 0.99880

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99792, 'recall_grapheme': 0.996964, 'recall_vowel': 0.998274, 'recall_consonant': 0.999478, 'recall_word': 0.996082, 'acc_grapheme': 0.99671, 'acc_vowel': 0.998405, 'acc_consonant': 0.999227, 'acc_word': 0.996037, 'loss_grapheme': 0.016199, 'loss_vowel': 0.008154, 'loss_consonant': 0.003945, 'loss_word': 0.016963}
SWA>>>:


100%|██████████| 1255/1255 [07:14<00:00,  2.73it/s]



val: {'recall': 0.998634, 'recall_grapheme': 0.998031, 'recall_vowel': 0.99884, 'recall_consonant': 0.999633, 'recall_word': 0.997796, 'acc_grapheme': 0.998006, 'acc_vowel': 0.998804, 'acc_consonant': 0.999452, 'acc_word': 0.997782, 'loss_grapheme': 0.011023, 'loss_vowel': 0.006025, 'loss_consonant': 0.002173, 'loss_word': 0.012158}
   15 | 0.000209 | 160640/160716 | 13.5670 | 5.3744 | 14.54 |
val: {'recall': 0.997963, 'recall_grapheme': 0.997155, 'recall_vowel': 0.998624, 'recall_consonant': 0.998917, 'recall_word': 0.996396, 'acc_grapheme': 0.99686, 'acc_vowel': 0.998405, 'acc_consonant': 0.998829, 'acc_word': 0.996386, 'loss_grapheme': 0.017125, 'loss_vowel': 0.008523, 'loss_consonant': 0.005554, 'loss_word': 0.018057}
   16 | 0.000204 | 160640/160716 | 6.4968 | 5.3727 | 14.51 ||
val: {'recall': 0.997707, 'recall_grapheme': 0.996683, 'recall_vowel': 0.998422, 'recall_consonant': 0.999042, 'recall_word': 0.99607, 'acc_grapheme': 0.996461, 'acc_vowel': 0.998206, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997826, 'recall_grapheme': 0.996922, 'recall_vowel': 0.998365, 'recall_consonant': 0.999096, 'recall_word': 0.996222, 'acc_grapheme': 0.996885, 'acc_vowel': 0.998355, 'acc_consonant': 0.998779, 'acc_word': 0.996262, 'loss_grapheme': 0.01677, 'loss_vowel': 0.008484, 'loss_consonant': 0.004376, 'loss_word': 0.018535}
SWA>>>:


100%|██████████| 1255/1255 [07:16<00:00,  2.84it/s]



val: {'recall': 0.998635, 'recall_grapheme': 0.998033, 'recall_vowel': 0.998839, 'recall_consonant': 0.999633, 'recall_word': 0.997759, 'acc_grapheme': 0.998031, 'acc_vowel': 0.998804, 'acc_consonant': 0.999452, 'acc_word': 0.997757, 'loss_grapheme': 0.011052, 'loss_vowel': 0.006, 'loss_consonant': 0.002137, 'loss_word': 0.012162}
   18 | 0.000193 | 160640/160716 | 0.8590 | 5.0676 | 14.54 ||
val: {'recall': 0.99774, 'recall_grapheme': 0.996462, 'recall_vowel': 0.998735, 'recall_consonant': 0.999299, 'recall_word': 0.996562, 'acc_grapheme': 0.99671, 'acc_vowel': 0.998505, 'acc_consonant': 0.999053, 'acc_word': 0.996511, 'loss_grapheme': 0.016121, 'loss_vowel': 0.007137, 'loss_consonant': 0.003849, 'loss_word': 0.016973}
   19 | 0.000188 | 160640/160716 | 0.6320 | 5.1046 | 14.50 ||
val: {'recall': 0.99815, 'recall_grapheme': 0.997473, 'recall_vowel': 0.998558, 'recall_consonant': 0.999097, 'recall_word': 0.996611, 'acc_grapheme': 0.997159, 'acc_vowel': 0.998579, 'acc_consonant': 0.99897

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997545, 'recall_grapheme': 0.996086, 'recall_vowel': 0.998577, 'recall_consonant': 0.99943, 'recall_word': 0.996166, 'acc_grapheme': 0.996162, 'acc_vowel': 0.99853, 'acc_consonant': 0.999153, 'acc_word': 0.996137, 'loss_grapheme': 0.018638, 'loss_vowel': 0.008447, 'loss_consonant': 0.004687, 'loss_word': 0.019072}
SWA>>>:


100%|██████████| 1255/1255 [07:12<00:00,  2.68it/s]



val: {'recall': 0.998677, 'recall_grapheme': 0.998053, 'recall_vowel': 0.998936, 'recall_consonant': 0.999667, 'recall_word': 0.997745, 'acc_grapheme': 0.998056, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997732, 'loss_grapheme': 0.011116, 'loss_vowel': 0.005994, 'loss_consonant': 0.002174, 'loss_word': 0.012167}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   21 | 0.000176 | 160640/160716 | 5.8294 | 5.3271 | 14.50 ||
val: {'recall': 0.99806, 'recall_grapheme': 0.997219, 'recall_vowel': 0.998524, 'recall_consonant': 0.999279, 'recall_word': 0.996654, 'acc_grapheme': 0.99691, 'acc_vowel': 0.99848, 'acc_consonant': 0.998978, 'acc_word': 0.99666, 'loss_grapheme': 0.015699, 'loss_vowel': 0.007105, 'loss_consonant': 0.004086, 'loss_word': 0.016024}
   22 | 0.000170 | 160640/160716 | 8.3119 | 5.3733 | 14.50 ||
val: {'recall': 0.997961, 'recall_grapheme': 0.997014, 'recall_vowel': 0.998598, 'recall_consonant': 0.999218, 'recall_word': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997604, 'recall_grapheme': 0.996295, 'recall_vowel': 0.998396, 'recall_consonant': 0.999431, 'recall_word': 0.996437, 'acc_grapheme': 0.996436, 'acc_vowel': 0.998455, 'acc_consonant': 0.999277, 'acc_word': 0.996461, 'loss_grapheme': 0.017045, 'loss_vowel': 0.007897, 'loss_consonant': 0.003531, 'loss_word': 0.017318}
SWA>>>:


100%|██████████| 1255/1255 [07:13<00:00,  2.79it/s]



val: {'recall': 0.998621, 'recall_grapheme': 0.997951, 'recall_vowel': 0.998909, 'recall_consonant': 0.999673, 'recall_word': 0.997848, 'acc_grapheme': 0.997981, 'acc_vowel': 0.998779, 'acc_consonant': 0.999502, 'acc_word': 0.997832, 'loss_grapheme': 0.011083, 'loss_vowel': 0.005991, 'loss_consonant': 0.002155, 'loss_word': 0.012108}
   24 | 0.000157 | 160640/160716 | 0.4895 | 5.3430 | 14.49 ||
val: {'recall': 0.998313, 'recall_grapheme': 0.99758, 'recall_vowel': 0.99887, 'recall_consonant': 0.999222, 'recall_word': 0.997385, 'acc_grapheme': 0.997582, 'acc_vowel': 0.998729, 'acc_consonant': 0.999202, 'acc_word': 0.997408, 'loss_grapheme': 0.012997, 'loss_vowel': 0.006227, 'loss_consonant': 0.002802, 'loss_word': 0.013524}
   25 | 0.000151 | 160640/160716 | 6.4776 | 5.2904 | 14.44 ||
val: {'recall': 0.997704, 'recall_grapheme': 0.996643, 'recall_vowel': 0.998487, 'recall_consonant': 0.999042, 'recall_word': 0.99635, 'acc_grapheme': 0.996635, 'acc_vowel': 0.998455, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997827, 'recall_grapheme': 0.996792, 'recall_vowel': 0.998617, 'recall_consonant': 0.999108, 'recall_word': 0.996717, 'acc_grapheme': 0.997009, 'acc_vowel': 0.99853, 'acc_consonant': 0.999053, 'acc_word': 0.996685, 'loss_grapheme': 0.014325, 'loss_vowel': 0.007184, 'loss_consonant': 0.003566, 'loss_word': 0.015148}
SWA>>>:


100%|██████████| 1255/1255 [07:13<00:00,  2.86it/s]



val: {'recall': 0.998669, 'recall_grapheme': 0.998026, 'recall_vowel': 0.998947, 'recall_consonant': 0.999678, 'recall_word': 0.997777, 'acc_grapheme': 0.997981, 'acc_vowel': 0.998854, 'acc_consonant': 0.999526, 'acc_word': 0.997757, 'loss_grapheme': 0.011045, 'loss_vowel': 0.00599, 'loss_consonant': 0.002171, 'loss_word': 0.012043}
   27 | 0.000138 | 160640/160716 | 6.4620 | 5.1066 | 14.51 ||
val: {'recall': 0.997921, 'recall_grapheme': 0.996925, 'recall_vowel': 0.998639, 'recall_consonant': 0.999195, 'recall_word': 0.996466, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998505, 'acc_consonant': 0.999078, 'acc_word': 0.996486, 'loss_grapheme': 0.015346, 'loss_vowel': 0.007633, 'loss_consonant': 0.003778, 'loss_word': 0.016378}
   28 | 0.000132 | 160640/160716 | 0.5793 | 5.3266 | 14.54 ||
val: {'recall': 0.998056, 'recall_grapheme': 0.997215, 'recall_vowel': 0.9985, 'recall_consonant': 0.999292, 'recall_word': 0.996847, 'acc_grapheme': 0.997134, 'acc_vowel': 0.998629, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997197, 'recall_grapheme': 0.99598, 'recall_vowel': 0.998239, 'recall_consonant': 0.998586, 'recall_word': 0.995552, 'acc_grapheme': 0.996237, 'acc_vowel': 0.998305, 'acc_consonant': 0.998878, 'acc_word': 0.995514, 'loss_grapheme': 0.019121, 'loss_vowel': 0.008618, 'loss_consonant': 0.004418, 'loss_word': 0.020419}
SWA>>>:


100%|██████████| 1255/1255 [07:12<00:00,  2.86it/s]



val: {'recall': 0.998637, 'recall_grapheme': 0.997964, 'recall_vowel': 0.998908, 'recall_consonant': 0.999712, 'recall_word': 0.997781, 'acc_grapheme': 0.997931, 'acc_vowel': 0.998804, 'acc_consonant': 0.999551, 'acc_word': 0.997757, 'loss_grapheme': 0.011091, 'loss_vowel': 0.005975, 'loss_consonant': 0.00215, 'loss_word': 0.012016}
   30 | 0.000118 | 160640/160716 | 7.0751 | 4.9872 | 14.52 ||
val: {'recall': 0.997728, 'recall_grapheme': 0.996649, 'recall_vowel': 0.998314, 'recall_consonant': 0.999299, 'recall_word': 0.996537, 'acc_grapheme': 0.99666, 'acc_vowel': 0.998505, 'acc_consonant': 0.999028, 'acc_word': 0.996511, 'loss_grapheme': 0.015987, 'loss_vowel': 0.007748, 'loss_consonant': 0.004148, 'loss_word': 0.017278}
   31 | 0.000112 | 160640/160716 | 0.2606 | 5.1547 | 14.50 ||
val: {'recall': 0.997877, 'recall_grapheme': 0.997057, 'recall_vowel': 0.998085, 'recall_consonant': 0.999312, 'recall_word': 0.996855, 'acc_grapheme': 0.997184, 'acc_vowel': 0.998355, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997735, 'recall_grapheme': 0.996718, 'recall_vowel': 0.998295, 'recall_consonant': 0.999207, 'recall_word': 0.99659, 'acc_grapheme': 0.996785, 'acc_vowel': 0.998455, 'acc_consonant': 0.998978, 'acc_word': 0.996611, 'loss_grapheme': 0.014797, 'loss_vowel': 0.007557, 'loss_consonant': 0.003945, 'loss_word': 0.015789}
SWA>>>:


100%|██████████| 1255/1255 [07:11<00:00,  2.65it/s]



val: {'recall': 0.998607, 'recall_grapheme': 0.997887, 'recall_vowel': 0.998908, 'recall_consonant': 0.999745, 'recall_word': 0.997838, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998804, 'acc_consonant': 0.999576, 'acc_word': 0.997807, 'loss_grapheme': 0.011003, 'loss_vowel': 0.005988, 'loss_consonant': 0.002185, 'loss_word': 0.011948}
   33 | 0.000099 | 160640/160716 | 4.2286 | 5.1531 | 14.51 ||
val: {'recall': 0.99755, 'recall_grapheme': 0.99639, 'recall_vowel': 0.998298, 'recall_consonant': 0.99912, 'recall_word': 0.99605, 'acc_grapheme': 0.99671, 'acc_vowel': 0.99838, 'acc_consonant': 0.998903, 'acc_word': 0.996087, 'loss_grapheme': 0.016542, 'loss_vowel': 0.008004, 'loss_consonant': 0.00457, 'loss_word': 0.018624}
   34 | 0.000093 | 160640/160716 | 0.2114 | 5.1639 | 14.50 ||
val: {'recall': 0.99799, 'recall_grapheme': 0.997104, 'recall_vowel': 0.998639, 'recall_consonant': 0.999113, 'recall_word': 0.996723, 'acc_grapheme': 0.997009, 'acc_vowel': 0.998654, 'acc_consonant': 0.998978,

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997746, 'recall_grapheme': 0.996595, 'recall_vowel': 0.998488, 'recall_consonant': 0.999307, 'recall_word': 0.996472, 'acc_grapheme': 0.99681, 'acc_vowel': 0.998455, 'acc_consonant': 0.999103, 'acc_word': 0.996486, 'loss_grapheme': 0.015331, 'loss_vowel': 0.007914, 'loss_consonant': 0.003679, 'loss_word': 0.016674}
SWA>>>:


100%|██████████| 1255/1255 [07:13<00:00,  2.69it/s]



val: {'recall': 0.998601, 'recall_grapheme': 0.997905, 'recall_vowel': 0.998885, 'recall_consonant': 0.999711, 'recall_word': 0.997782, 'acc_grapheme': 0.997882, 'acc_vowel': 0.998779, 'acc_consonant': 0.999551, 'acc_word': 0.997757, 'loss_grapheme': 0.011018, 'loss_vowel': 0.005959, 'loss_consonant': 0.002179, 'loss_word': 0.011939}
   37 | 0.000074 | 160640/160716 | 14.7694 | 4.9844 | 14.52 |
val: {'recall': 0.997874, 'recall_grapheme': 0.996762, 'recall_vowel': 0.99869, 'recall_consonant': 0.999282, 'recall_word': 0.996696, 'acc_grapheme': 0.996885, 'acc_vowel': 0.998554, 'acc_consonant': 0.999078, 'acc_word': 0.99671, 'loss_grapheme': 0.015923, 'loss_vowel': 0.007434, 'loss_consonant': 0.003739, 'loss_word': 0.016728}
   38 | 0.000068 | 160640/160716 | 6.6289 | 5.1155 | 14.53 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997745, 'recall_grapheme': 0.996513, 'recall_vowel': 0.998892, 'recall_consonant': 0.999063, 'recall_word': 0.996235, 'acc_grapheme': 0.99666, 'acc_vowel': 0.998629, 'acc_consonant': 0.999028, 'acc_word': 0.996212, 'loss_grapheme': 0.017023, 'loss_vowel': 0.008293, 'loss_consonant': 0.004125, 'loss_word': 0.018726}
SWA>>>:


100%|██████████| 1255/1255 [07:13<00:00,  2.75it/s]



val: {'recall': 0.99865, 'recall_grapheme': 0.998, 'recall_vowel': 0.998884, 'recall_consonant': 0.999717, 'recall_word': 0.997781, 'acc_grapheme': 0.997931, 'acc_vowel': 0.998804, 'acc_consonant': 0.999576, 'acc_word': 0.997757, 'loss_grapheme': 0.011035, 'loss_vowel': 0.005972, 'loss_consonant': 0.002188, 'loss_word': 0.011967}
   39 | 0.000063 | 160640/160716 | 0.3335 | 5.1259 | 14.54 ||
val: {'recall': 0.998234, 'recall_grapheme': 0.997406, 'recall_vowel': 0.998873, 'recall_consonant': 0.999253, 'recall_word': 0.997072, 'acc_grapheme': 0.997483, 'acc_vowel': 0.998754, 'acc_consonant': 0.999202, 'acc_word': 0.997109, 'loss_grapheme': 0.012875, 'loss_vowel': 0.006456, 'loss_consonant': 0.003111, 'loss_word': 0.013731}
   40 | 0.000057 | 160640/160716 | 5.1725 | 5.0304 | 14.51 ||
val: {'recall': 0.997948, 'recall_grapheme': 0.996947, 'recall_vowel': 0.998668, 'recall_consonant': 0.999227, 'recall_word': 0.996481, 'acc_grapheme': 0.99686, 'acc_vowel': 0.998579, 'acc_consonant': 0.9990

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998068, 'recall_grapheme': 0.997123, 'recall_vowel': 0.998645, 'recall_consonant': 0.999379, 'recall_word': 0.997084, 'acc_grapheme': 0.997134, 'acc_vowel': 0.998604, 'acc_consonant': 0.999277, 'acc_word': 0.997134, 'loss_grapheme': 0.013369, 'loss_vowel': 0.007044, 'loss_consonant': 0.00299, 'loss_word': 0.014333}
SWA>>>:


100%|██████████| 1255/1255 [07:13<00:00,  2.81it/s]



val: {'recall': 0.998663, 'recall_grapheme': 0.998025, 'recall_vowel': 0.998884, 'recall_consonant': 0.999717, 'recall_word': 0.997781, 'acc_grapheme': 0.997981, 'acc_vowel': 0.998804, 'acc_consonant': 0.999576, 'acc_word': 0.997757, 'loss_grapheme': 0.010994, 'loss_vowel': 0.005981, 'loss_consonant': 0.002189, 'loss_word': 0.011923}
   42 | 0.000046 | 160640/160716 | 9.6541 | 5.0548 | 14.48 ||
val: {'recall': 0.997971, 'recall_grapheme': 0.997023, 'recall_vowel': 0.998578, 'recall_consonant': 0.999261, 'recall_word': 0.996606, 'acc_grapheme': 0.996984, 'acc_vowel': 0.99853, 'acc_consonant': 0.999128, 'acc_word': 0.996586, 'loss_grapheme': 0.015175, 'loss_vowel': 0.00744, 'loss_consonant': 0.003526, 'loss_word': 0.016027}
   43 | 0.000041 | 160640/160716 | 0.4952 | 5.0505 | 14.52 ||
val: {'recall': 0.998173, 'recall_grapheme': 0.997281, 'recall_vowel': 0.998713, 'recall_consonant': 0.99942, 'recall_word': 0.997091, 'acc_grapheme': 0.997383, 'acc_vowel': 0.998654, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998031, 'recall_grapheme': 0.996973, 'recall_vowel': 0.998634, 'recall_consonant': 0.999546, 'recall_word': 0.996625, 'acc_grapheme': 0.996935, 'acc_vowel': 0.998679, 'acc_consonant': 0.999302, 'acc_word': 0.996635, 'loss_grapheme': 0.015263, 'loss_vowel': 0.007387, 'loss_consonant': 0.003292, 'loss_word': 0.016521}
SWA>>>:


100%|██████████| 1255/1255 [07:12<00:00,  2.86it/s]



val: {'recall': 0.998644, 'recall_grapheme': 0.997961, 'recall_vowel': 0.998903, 'recall_consonant': 0.999751, 'recall_word': 0.997802, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998804, 'acc_consonant': 0.999601, 'acc_word': 0.997782, 'loss_grapheme': 0.011001, 'loss_vowel': 0.005979, 'loss_consonant': 0.002175, 'loss_word': 0.01192}
   45 | 0.000034 | 108544/160716 | 3.6602 | 5.2251 | 9.82 ||

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa...

val: {'recall': 0.998467, 'recall_grapheme': 0.99772, 'recall_vowel': 0.998871, 'recall_consonant': 0.999558, 'recall_word': 0.99743, 'acc_grapheme': 0.997582, 'acc_vowel': 0.998704, 'acc_consonant': 0.999477, 'acc_word': 0.997458, 'loss_grapheme': 0.012791, 'loss_vowel': 0.006768, 'loss_consonant': 0.002529, 'loss_word': 0.01347}

val: {'recall': 0.998648, 'recall_grapheme': 0.998048, 'recall_vowel': 0.998873, 'recall_consonant': 0.999623, 'recall_word': 0.997515, 'acc_grapheme': 0.997931, 'acc_vowel': 0.998704, 'acc_consonant': 0.999402, 'acc_word': 0.997483, 'loss_grapheme': 0.011498, 'loss_vowel': 0.00635, 'loss_consonant': 0.002381, 'loss_word': 0.012641}
CYCLE: 1
   14 | 0

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997934, 'recall_grapheme': 0.997128, 'recall_vowel': 0.998158, 'recall_consonant': 0.999324, 'recall_word': 0.996713, 'acc_grapheme': 0.99686, 'acc_vowel': 0.99843, 'acc_consonant': 0.999028, 'acc_word': 0.99671, 'loss_grapheme': 0.014749, 'loss_vowel': 0.007822, 'loss_consonant': 0.003959, 'loss_word': 0.015996}
SWA>>>:


100%|██████████| 1255/1255 [07:12<00:00,  2.93it/s]



val: {'recall': 0.998589, 'recall_grapheme': 0.997942, 'recall_vowel': 0.998871, 'recall_consonant': 0.999601, 'recall_word': 0.997536, 'acc_grapheme': 0.997832, 'acc_vowel': 0.998704, 'acc_consonant': 0.999427, 'acc_word': 0.997508, 'loss_grapheme': 0.011495, 'loss_vowel': 0.006407, 'loss_consonant': 0.002351, 'loss_word': 0.012667}
   16 | 0.000222 | 160640/160716 | 5.9235 | 5.1297 | 14.50 ||
val: {'recall': 0.997779, 'recall_grapheme': 0.996753, 'recall_vowel': 0.998259, 'recall_consonant': 0.999351, 'recall_word': 0.996096, 'acc_grapheme': 0.996785, 'acc_vowel': 0.998355, 'acc_consonant': 0.999128, 'acc_word': 0.996137, 'loss_grapheme': 0.016279, 'loss_vowel': 0.008563, 'loss_consonant': 0.004194, 'loss_word': 0.017798}
   17 | 0.000214 | 160640/160716 | 5.4592 | 5.3904 | 14.49 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997841, 'recall_grapheme': 0.996981, 'recall_vowel': 0.998284, 'recall_consonant': 0.999117, 'recall_word': 0.996532, 'acc_grapheme': 0.996835, 'acc_vowel': 0.99828, 'acc_consonant': 0.999103, 'acc_word': 0.996611, 'loss_grapheme': 0.015392, 'loss_vowel': 0.008287, 'loss_consonant': 0.003979, 'loss_word': 0.016326}
SWA>>>:


100%|██████████| 1255/1255 [07:10<00:00,  2.89it/s]



val: {'recall': 0.998594, 'recall_grapheme': 0.997931, 'recall_vowel': 0.998906, 'recall_consonant': 0.999607, 'recall_word': 0.997449, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998754, 'acc_consonant': 0.999452, 'acc_word': 0.997433, 'loss_grapheme': 0.011374, 'loss_vowel': 0.006303, 'loss_consonant': 0.002279, 'loss_word': 0.012532}
   18 | 0.000205 | 160640/160716 | 10.3424 | 5.1799 | 14.54 |
val: {'recall': 0.99775, 'recall_grapheme': 0.996814, 'recall_vowel': 0.998338, 'recall_consonant': 0.999034, 'recall_word': 0.996463, 'acc_grapheme': 0.996511, 'acc_vowel': 0.99833, 'acc_consonant': 0.998903, 'acc_word': 0.996511, 'loss_grapheme': 0.016413, 'loss_vowel': 0.008755, 'loss_consonant': 0.004326, 'loss_word': 0.017831}
   19 | 0.000196 | 160640/160716 | 1.5417 | 5.3063 | 14.54 ||
val: {'recall': 0.997805, 'recall_grapheme': 0.996718, 'recall_vowel': 0.998455, 'recall_consonant': 0.999332, 'recall_word': 0.996169, 'acc_grapheme': 0.99671, 'acc_vowel': 0.998455, 'acc_consonant': 0.99

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1255/1255 [07:12<00:00,  2.91it/s]



val: {'recall': 0.998603, 'recall_grapheme': 0.997946, 'recall_vowel': 0.998913, 'recall_consonant': 0.999607, 'recall_word': 0.997618, 'acc_grapheme': 0.997906, 'acc_vowel': 0.998779, 'acc_consonant': 0.999452, 'acc_word': 0.997582, 'loss_grapheme': 0.011231, 'loss_vowel': 0.006189, 'loss_consonant': 0.00226, 'loss_word': 0.012382}
   21 | 0.000178 | 160640/160716 | 8.1772 | 5.2105 | 14.58 ||
val: {'recall': 0.997631, 'recall_grapheme': 0.996458, 'recall_vowel': 0.99835, 'recall_consonant': 0.999259, 'recall_word': 0.995943, 'acc_grapheme': 0.996586, 'acc_vowel': 0.99833, 'acc_consonant': 0.998903, 'acc_word': 0.995987, 'loss_grapheme': 0.016753, 'loss_vowel': 0.00862, 'loss_consonant': 0.004507, 'loss_word': 0.01891}
   22 | 0.000169 | 160640/160716 | 0.1809 | 5.3621 | 14.56 ||
val: {'recall': 0.997651, 'recall_grapheme': 0.996472, 'recall_vowel': 0.998477, 'recall_consonant': 0.999185, 'recall_word': 0.996148, 'acc_grapheme': 0.996361, 'acc_vowel': 0.998405, 'acc_consonant': 0.9990

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997909, 'recall_grapheme': 0.996883, 'recall_vowel': 0.998679, 'recall_consonant': 0.99919, 'recall_word': 0.996454, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998604, 'acc_consonant': 0.999003, 'acc_word': 0.996486, 'loss_grapheme': 0.015161, 'loss_vowel': 0.006839, 'loss_consonant': 0.004194, 'loss_word': 0.015728}
SWA>>>:


100%|██████████| 1255/1255 [07:18<00:00,  2.75it/s]



val: {'recall': 0.998671, 'recall_grapheme': 0.998067, 'recall_vowel': 0.998935, 'recall_consonant': 0.999613, 'recall_word': 0.997673, 'acc_grapheme': 0.997981, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997632, 'loss_grapheme': 0.011121, 'loss_vowel': 0.006063, 'loss_consonant': 0.002289, 'loss_word': 0.012252}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   24 | 0.000150 | 160640/160716 | 10.0296 | 5.5424 | 14.56 |
val: {'recall': 0.997565, 'recall_grapheme': 0.996235, 'recall_vowel': 0.998493, 'recall_consonant': 0.999299, 'recall_word': 0.995786, 'acc_grapheme': 0.996187, 'acc_vowel': 0.998305, 'acc_consonant': 0.999028, 'acc_word': 0.995788, 'loss_grapheme': 0.018083, 'loss_vowel': 0.008531, 'loss_consonant': 0.004728, 'loss_word': 0.019326}
   25 | 0.000141 | 160640/160716 | 5.8276 | 5.3461 | 14.60 ||
val: {'recall': 0.997903, 'recall_grapheme': 0.996834, 'recall_vowel': 0.998681, 'recall_consonant': 0.999262, 'recall_word': 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997982, 'recall_grapheme': 0.997164, 'recall_vowel': 0.998331, 'recall_consonant': 0.999268, 'recall_word': 0.996433, 'acc_grapheme': 0.996984, 'acc_vowel': 0.99838, 'acc_consonant': 0.999053, 'acc_word': 0.996436, 'loss_grapheme': 0.015066, 'loss_vowel': 0.008072, 'loss_consonant': 0.004001, 'loss_word': 0.016695}
SWA>>>:


100%|██████████| 1255/1255 [07:13<00:00,  2.82it/s]



val: {'recall': 0.998645, 'recall_grapheme': 0.998016, 'recall_vowel': 0.998935, 'recall_consonant': 0.999613, 'recall_word': 0.99763, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997607, 'loss_grapheme': 0.011158, 'loss_vowel': 0.006084, 'loss_consonant': 0.002287, 'loss_word': 0.012287}
   27 | 0.000122 | 160640/160716 | 6.4880 | 5.0670 | 14.61 ||
val: {'recall': 0.998084, 'recall_grapheme': 0.997065, 'recall_vowel': 0.998993, 'recall_consonant': 0.999213, 'recall_word': 0.996894, 'acc_grapheme': 0.996984, 'acc_vowel': 0.998679, 'acc_consonant': 0.999028, 'acc_word': 0.99691, 'loss_grapheme': 0.013763, 'loss_vowel': 0.006327, 'loss_consonant': 0.003847, 'loss_word': 0.013997}
   28 | 0.000113 | 160640/160716 | 7.2772 | 5.1226 | 14.60 ||
val: {'recall': 0.997667, 'recall_grapheme': 0.996463, 'recall_vowel': 0.998454, 'recall_consonant': 0.999288, 'recall_word': 0.996373, 'acc_grapheme': 0.996536, 'acc_vowel': 0.998505, 'acc_consonant': 0.9

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997972, 'recall_grapheme': 0.996977, 'recall_vowel': 0.998579, 'recall_consonant': 0.999355, 'recall_word': 0.99664, 'acc_grapheme': 0.997059, 'acc_vowel': 0.99848, 'acc_consonant': 0.999178, 'acc_word': 0.996685, 'loss_grapheme': 0.014802, 'loss_vowel': 0.007591, 'loss_consonant': 0.003747, 'loss_word': 0.016135}
SWA>>>:


100%|██████████| 1255/1255 [07:18<00:00,  2.88it/s]



val: {'recall': 0.998645, 'recall_grapheme': 0.998016, 'recall_vowel': 0.998935, 'recall_consonant': 0.999613, 'recall_word': 0.997595, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997582, 'loss_grapheme': 0.011158, 'loss_vowel': 0.006139, 'loss_consonant': 0.002295, 'loss_word': 0.012315}
   30 | 0.000095 | 160640/160716 | 14.4458 | 5.0728 | 14.60 |
val: {'recall': 0.99779, 'recall_grapheme': 0.996688, 'recall_vowel': 0.998344, 'recall_consonant': 0.999441, 'recall_word': 0.996278, 'acc_grapheme': 0.99676, 'acc_vowel': 0.99838, 'acc_consonant': 0.999202, 'acc_word': 0.996336, 'loss_grapheme': 0.015519, 'loss_vowel': 0.008178, 'loss_consonant': 0.003434, 'loss_word': 0.016567}
   31 | 0.000086 | 160640/160716 | 15.0001 | 5.1902 | 14.59 |
val: {'recall': 0.998016, 'recall_grapheme': 0.997071, 'recall_vowel': 0.998591, 'recall_consonant': 0.99933, 'recall_word': 0.996361, 'acc_grapheme': 0.997034, 'acc_vowel': 0.99838, 'acc_consonant': 0.9991

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998138, 'recall_grapheme': 0.997253, 'recall_vowel': 0.998715, 'recall_consonant': 0.999331, 'recall_word': 0.996983, 'acc_grapheme': 0.997134, 'acc_vowel': 0.99848, 'acc_consonant': 0.999178, 'acc_word': 0.997059, 'loss_grapheme': 0.014476, 'loss_vowel': 0.007179, 'loss_consonant': 0.003493, 'loss_word': 0.015795}
SWA>>>:


100%|██████████| 1255/1255 [07:15<00:00,  2.85it/s]



val: {'recall': 0.998649, 'recall_grapheme': 0.998024, 'recall_vowel': 0.998935, 'recall_consonant': 0.999613, 'recall_word': 0.997574, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998829, 'acc_consonant': 0.999477, 'acc_word': 0.997558, 'loss_grapheme': 0.0111, 'loss_vowel': 0.006074, 'loss_consonant': 0.002277, 'loss_word': 0.012222}
   33 | 0.000070 | 160640/160716 | 4.2696 | 5.0962 | 14.59 ||
val: {'recall': 0.997711, 'recall_grapheme': 0.996661, 'recall_vowel': 0.998127, 'recall_consonant': 0.999396, 'recall_word': 0.996236, 'acc_grapheme': 0.996785, 'acc_vowel': 0.998305, 'acc_consonant': 0.999227, 'acc_word': 0.996311, 'loss_grapheme': 0.015705, 'loss_vowel': 0.007955, 'loss_consonant': 0.003737, 'loss_word': 0.016994}
   34 | 0.000062 | 160640/160716 | 5.6416 | 4.9282 | 14.65 ||
val: {'recall': 0.997979, 'recall_grapheme': 0.997111, 'recall_vowel': 0.998403, 'recall_consonant': 0.99929, 'recall_word': 0.996401, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998455, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997466, 'recall_grapheme': 0.996239, 'recall_vowel': 0.997975, 'recall_consonant': 0.999411, 'recall_word': 0.996281, 'acc_grapheme': 0.996611, 'acc_vowel': 0.998305, 'acc_consonant': 0.999178, 'acc_word': 0.996361, 'loss_grapheme': 0.016078, 'loss_vowel': 0.008395, 'loss_consonant': 0.003901, 'loss_word': 0.017902}
SWA>>>:


100%|██████████| 1255/1255 [07:15<00:00,  2.87it/s]



val: {'recall': 0.998629, 'recall_grapheme': 0.997998, 'recall_vowel': 0.998907, 'recall_consonant': 0.999613, 'recall_word': 0.997644, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998804, 'acc_consonant': 0.999477, 'acc_word': 0.997632, 'loss_grapheme': 0.011099, 'loss_vowel': 0.006072, 'loss_consonant': 0.002283, 'loss_word': 0.012234}
   36 | 0.000047 | 160640/160716 | 0.1572 | 4.8740 | 14.62 ||
val: {'recall': 0.998092, 'recall_grapheme': 0.997225, 'recall_vowel': 0.998601, 'recall_consonant': 0.999316, 'recall_word': 0.996614, 'acc_grapheme': 0.997184, 'acc_vowel': 0.99853, 'acc_consonant': 0.999103, 'acc_word': 0.99671, 'loss_grapheme': 0.01443, 'loss_vowel': 0.007236, 'loss_consonant': 0.003541, 'loss_word': 0.016065}
   37 | 0.000041 | 160640/160716 | 10.6130 | 5.0171 | 14.58 |
val: {'recall': 0.997979, 'recall_grapheme': 0.997051, 'recall_vowel': 0.998485, 'recall_consonant': 0.999327, 'recall_word': 0.996642, 'acc_grapheme': 0.997109, 'acc_vowel': 0.998604, 'acc_consonant': 0.99

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997757, 'recall_grapheme': 0.99677, 'recall_vowel': 0.998158, 'recall_consonant': 0.99933, 'recall_word': 0.996466, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998505, 'acc_consonant': 0.999202, 'acc_word': 0.996511, 'loss_grapheme': 0.015931, 'loss_vowel': 0.008076, 'loss_consonant': 0.003841, 'loss_word': 0.017682}
SWA>>>:


100%|██████████| 1255/1255 [07:17<00:00,  2.75it/s]


   39 | 0.000029 | 160640/160716 | 5.5000 | 4.9362 | 14.54 ||
val: {'recall': 0.998016, 'recall_grapheme': 0.997107, 'recall_vowel': 0.998442, 'recall_consonant': 0.999409, 'recall_word': 0.996583, 'acc_grapheme': 0.997034, 'acc_vowel': 0.99848, 'acc_consonant': 0.999302, 'acc_word': 0.996635, 'loss_grapheme': 0.015601, 'loss_vowel': 0.007675, 'loss_consonant': 0.003668, 'loss_word': 0.017253}
   40 | 0.000023 | 160640/160716 | 0.2301 | 5.0696 | 14.56 ||
val: {'recall': 0.99831, 'recall_grapheme': 0.997554, 'recall_vowel': 0.99866, 'recall_consonant': 0.999473, 'recall_word': 0.997267, 'acc_grapheme': 0.997632, 'acc_vowel': 0.998704, 'acc_consonant': 0.999327, 'acc_word': 0.997283, 'loss_grapheme': 0.012165, 'loss_vowel': 0.006354, 'loss_consonant': 0.002903, 'loss_word': 0.013325}
   41 | 0.000019 | 160640/160716 | 4.6941 | 5.0980 | 14.58 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997915, 'recall_grapheme': 0.996847, 'recall_vowel': 0.998599, 'recall_consonant': 0.99937, 'recall_word': 0.996576, 'acc_grapheme': 0.996935, 'acc_vowel': 0.998505, 'acc_consonant': 0.999227, 'acc_word': 0.996611, 'loss_grapheme': 0.015353, 'loss_vowel': 0.007471, 'loss_consonant': 0.00361, 'loss_word': 0.016937}
SWA>>>:


 21%|██▏       | 269/1255 [01:36<05:38,  2.91it/s]

KeyboardInterrupt: 

In [39]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa...

val: {'recall': 0.998314, 'recall_grapheme': 0.997668, 'recall_vowel': 0.998427, 'recall_consonant': 0.99949, 'recall_word': 0.99722, 'acc_grapheme': 0.997333, 'acc_vowel': 0.998654, 'acc_consonant': 0.999302, 'acc_word': 0.997234, 'loss_grapheme': 0.012546, 'loss_vowel': 0.006781, 'loss_consonant': 0.002767, 'loss_word': 0.013686}

val: {'recall': 0.998499, 'recall_grapheme': 0.997926, 'recall_vowel': 0.998543, 'recall_consonant': 0.999602, 'recall_word': 0.997673, 'acc_grapheme': 0.997882, 'acc_vowel': 0.998779, 'acc_consonant': 0.999427, 'acc_word': 0.997657, 'loss_grapheme': 0.010988, 'loss_vowel': 0.006149, 'loss_consonant': 0.00255, 'loss_word': 0.011906}
CYCLE: 1
    8 | 

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997023, 'recall_grapheme': 0.99578, 'recall_vowel': 0.997481, 'recall_consonant': 0.999051, 'recall_word': 0.995257, 'acc_grapheme': 0.995788, 'acc_vowel': 0.997882, 'acc_consonant': 0.998928, 'acc_word': 0.995265, 'loss_grapheme': 0.019129, 'loss_vowel': 0.010337, 'loss_consonant': 0.005143, 'loss_word': 0.020291}
SWA>>>:


100%|██████████| 1255/1255 [08:40<00:00,  2.45it/s]



val: {'recall': 0.997864, 'recall_grapheme': 0.997037, 'recall_vowel': 0.998158, 'recall_consonant': 0.999223, 'recall_word': 0.996839, 'acc_grapheme': 0.997084, 'acc_vowel': 0.998405, 'acc_consonant': 0.999103, 'acc_word': 0.99681, 'loss_grapheme': 0.013349, 'loss_vowel': 0.007275, 'loss_consonant': 0.003111, 'loss_word': 0.014714}
   10 | 0.000266 | 160640/160716 | 0.5633 | 5.5519 | 16.03 ||
val: {'recall': 0.997647, 'recall_grapheme': 0.996431, 'recall_vowel': 0.998337, 'recall_consonant': 0.999389, 'recall_word': 0.996037, 'acc_grapheme': 0.996461, 'acc_vowel': 0.99843, 'acc_consonant': 0.998978, 'acc_word': 0.996062, 'loss_grapheme': 0.016803, 'loss_vowel': 0.008233, 'loss_consonant': 0.004633, 'loss_word': 0.017464}
   11 | 0.000259 | 160640/160716 | 13.2253 | 5.4043 | 16.07 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997343, 'recall_grapheme': 0.996182, 'recall_vowel': 0.998469, 'recall_consonant': 0.99854, 'recall_word': 0.995881, 'acc_grapheme': 0.996037, 'acc_vowel': 0.99838, 'acc_consonant': 0.998903, 'acc_word': 0.995838, 'loss_grapheme': 0.019492, 'loss_vowel': 0.009028, 'loss_consonant': 0.005488, 'loss_word': 0.020716}
SWA>>>:


100%|██████████| 1255/1255 [08:32<00:00,  2.43it/s]



val: {'recall': 0.998228, 'recall_grapheme': 0.997371, 'recall_vowel': 0.998693, 'recall_consonant': 0.999476, 'recall_word': 0.997346, 'acc_grapheme': 0.997508, 'acc_vowel': 0.998729, 'acc_consonant': 0.999402, 'acc_word': 0.997333, 'loss_grapheme': 0.01217, 'loss_vowel': 0.006704, 'loss_consonant': 0.002375, 'loss_word': 0.013331}
   12 | 0.000253 | 160640/160716 | 6.3436 | 5.6027 | 16.06 ||
val: {'recall': 0.997346, 'recall_grapheme': 0.996081, 'recall_vowel': 0.99841, 'recall_consonant': 0.998815, 'recall_word': 0.995531, 'acc_grapheme': 0.996262, 'acc_vowel': 0.998131, 'acc_consonant': 0.998629, 'acc_word': 0.995564, 'loss_grapheme': 0.017862, 'loss_vowel': 0.008967, 'loss_consonant': 0.004795, 'loss_word': 0.019365}
   13 | 0.000246 | 160640/160716 | 14.4517 | 5.5571 | 15.99 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99736, 'recall_grapheme': 0.995899, 'recall_vowel': 0.99826, 'recall_consonant': 0.999382, 'recall_word': 0.996185, 'acc_grapheme': 0.995987, 'acc_vowel': 0.99823, 'acc_consonant': 0.999078, 'acc_word': 0.996187, 'loss_grapheme': 0.017413, 'loss_vowel': 0.008114, 'loss_consonant': 0.004311, 'loss_word': 0.017833}
SWA>>>:


100%|██████████| 1255/1255 [08:37<00:00,  2.32it/s]



val: {'recall': 0.998332, 'recall_grapheme': 0.997375, 'recall_vowel': 0.998917, 'recall_consonant': 0.999661, 'recall_word': 0.997426, 'acc_grapheme': 0.997558, 'acc_vowel': 0.998878, 'acc_consonant': 0.999452, 'acc_word': 0.997383, 'loss_grapheme': 0.011828, 'loss_vowel': 0.006312, 'loss_consonant': 0.002348, 'loss_word': 0.012956}
   14 | 0.000238 | 160640/160716 | 0.3471 | 5.5145 | 16.05 ||
val: {'recall': 0.99784, 'recall_grapheme': 0.996568, 'recall_vowel': 0.99885, 'recall_consonant': 0.999372, 'recall_word': 0.996157, 'acc_grapheme': 0.996386, 'acc_vowel': 0.99853, 'acc_consonant': 0.999128, 'acc_word': 0.996187, 'loss_grapheme': 0.0159, 'loss_vowel': 0.007829, 'loss_consonant': 0.003677, 'loss_word': 0.016348}
   15 | 0.000230 | 160640/160716 | 1.8412 | 5.4672 | 16.06 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.996671, 'recall_grapheme': 0.995455, 'recall_vowel': 0.997886, 'recall_consonant': 0.997888, 'recall_word': 0.995511, 'acc_grapheme': 0.995713, 'acc_vowel': 0.998206, 'acc_consonant': 0.998629, 'acc_word': 0.995589, 'loss_grapheme': 0.018582, 'loss_vowel': 0.008699, 'loss_consonant': 0.00577, 'loss_word': 0.019296}
SWA>>>:


100%|██████████| 1255/1255 [08:32<00:00,  2.30it/s]



val: {'recall': 0.998269, 'recall_grapheme': 0.997405, 'recall_vowel': 0.998612, 'recall_consonant': 0.999655, 'recall_word': 0.997403, 'acc_grapheme': 0.997558, 'acc_vowel': 0.998754, 'acc_consonant': 0.999427, 'acc_word': 0.997408, 'loss_grapheme': 0.011518, 'loss_vowel': 0.006268, 'loss_consonant': 0.002487, 'loss_word': 0.012579}
   16 | 0.000222 | 160640/160716 | 2.6604 | 5.5776 | 16.06 ||
val: {'recall': 0.99736, 'recall_grapheme': 0.996393, 'recall_vowel': 0.998284, 'recall_consonant': 0.998369, 'recall_word': 0.996448, 'acc_grapheme': 0.996486, 'acc_vowel': 0.998355, 'acc_consonant': 0.998829, 'acc_word': 0.996436, 'loss_grapheme': 0.01643, 'loss_vowel': 0.008154, 'loss_consonant': 0.004427, 'loss_word': 0.017147}
   17 | 0.000214 | 160640/160716 | 15.8123 | 5.0732 | 16.11 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997399, 'recall_grapheme': 0.996385, 'recall_vowel': 0.998242, 'recall_consonant': 0.998584, 'recall_word': 0.996009, 'acc_grapheme': 0.996386, 'acc_vowel': 0.99833, 'acc_consonant': 0.998804, 'acc_word': 0.996037, 'loss_grapheme': 0.017644, 'loss_vowel': 0.007433, 'loss_consonant': 0.004672, 'loss_word': 0.018}
SWA>>>:


100%|██████████| 1255/1255 [08:43<00:00,  2.43it/s]



val: {'recall': 0.998407, 'recall_grapheme': 0.99764, 'recall_vowel': 0.998681, 'recall_consonant': 0.999666, 'recall_word': 0.997379, 'acc_grapheme': 0.997707, 'acc_vowel': 0.998754, 'acc_consonant': 0.999477, 'acc_word': 0.997358, 'loss_grapheme': 0.01134, 'loss_vowel': 0.005986, 'loss_consonant': 0.002358, 'loss_word': 0.012217}
   18 | 0.000205 | 160640/160716 | 11.9932 | 5.2639 | 16.12 |
val: {'recall': 0.997502, 'recall_grapheme': 0.996274, 'recall_vowel': 0.998148, 'recall_consonant': 0.999315, 'recall_word': 0.996095, 'acc_grapheme': 0.996287, 'acc_vowel': 0.998405, 'acc_consonant': 0.999103, 'acc_word': 0.996087, 'loss_grapheme': 0.017495, 'loss_vowel': 0.008109, 'loss_consonant': 0.004539, 'loss_word': 0.017828}
   19 | 0.000196 | 160640/160716 | 12.1270 | 5.3155 | 16.08 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997252, 'recall_grapheme': 0.996814, 'recall_vowel': 0.997523, 'recall_consonant': 0.997854, 'recall_word': 0.995928, 'acc_grapheme': 0.996536, 'acc_vowel': 0.997582, 'acc_consonant': 0.998953, 'acc_word': 0.995663, 'loss_grapheme': 0.017319, 'loss_vowel': 0.010803, 'loss_consonant': 0.004248, 'loss_word': 0.018817}
SWA>>>:


100%|██████████| 1255/1255 [08:28<00:00,  2.39it/s]



val: {'recall': 0.998456, 'recall_grapheme': 0.997696, 'recall_vowel': 0.998795, 'recall_consonant': 0.999639, 'recall_word': 0.99746, 'acc_grapheme': 0.997732, 'acc_vowel': 0.998804, 'acc_consonant': 0.999477, 'acc_word': 0.997433, 'loss_grapheme': 0.011272, 'loss_vowel': 0.005925, 'loss_consonant': 0.002281, 'loss_word': 0.012134}
   20 | 0.000187 | 160640/160716 | 0.0737 | 5.3117 | 16.05 ||
val: {'recall': 0.997434, 'recall_grapheme': 0.995993, 'recall_vowel': 0.998636, 'recall_consonant': 0.999117, 'recall_word': 0.995592, 'acc_grapheme': 0.996162, 'acc_vowel': 0.998405, 'acc_consonant': 0.998928, 'acc_word': 0.995564, 'loss_grapheme': 0.017545, 'loss_vowel': 0.008295, 'loss_consonant': 0.003748, 'loss_word': 0.018542}
   21 | 0.000178 | 160640/160716 | 3.6846 | 5.2597 | 16.05 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997413, 'recall_grapheme': 0.996226, 'recall_vowel': 0.998158, 'recall_consonant': 0.999041, 'recall_word': 0.995715, 'acc_grapheme': 0.996212, 'acc_vowel': 0.99833, 'acc_consonant': 0.998854, 'acc_word': 0.995788, 'loss_grapheme': 0.018031, 'loss_vowel': 0.009006, 'loss_consonant': 0.004541, 'loss_word': 0.019222}
SWA>>>:


100%|██████████| 1255/1255 [08:41<00:00,  2.32it/s]



val: {'recall': 0.998482, 'recall_grapheme': 0.997753, 'recall_vowel': 0.998778, 'recall_consonant': 0.999644, 'recall_word': 0.997521, 'acc_grapheme': 0.997732, 'acc_vowel': 0.998779, 'acc_consonant': 0.999502, 'acc_word': 0.997508, 'loss_grapheme': 0.011234, 'loss_vowel': 0.005853, 'loss_consonant': 0.002295, 'loss_word': 0.012114}
   22 | 0.000169 | 160640/160716 | 16.0230 | 5.2556 | 16.11 |
val: {'recall': 0.99773, 'recall_grapheme': 0.996721, 'recall_vowel': 0.998459, 'recall_consonant': 0.999021, 'recall_word': 0.996348, 'acc_grapheme': 0.996536, 'acc_vowel': 0.998505, 'acc_consonant': 0.999028, 'acc_word': 0.996386, 'loss_grapheme': 0.016491, 'loss_vowel': 0.008414, 'loss_consonant': 0.004069, 'loss_word': 0.01733}
   23 | 0.000159 | 160640/160716 | 0.4771 | 5.1352 | 16.10 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997767, 'recall_grapheme': 0.996743, 'recall_vowel': 0.998276, 'recall_consonant': 0.999305, 'recall_word': 0.996555, 'acc_grapheme': 0.996685, 'acc_vowel': 0.99843, 'acc_consonant': 0.999078, 'acc_word': 0.996536, 'loss_grapheme': 0.015842, 'loss_vowel': 0.007957, 'loss_consonant': 0.004414, 'loss_word': 0.01687}
SWA>>>:


100%|██████████| 1255/1255 [08:38<00:00,  2.38it/s]



val: {'recall': 0.998518, 'recall_grapheme': 0.997844, 'recall_vowel': 0.998708, 'recall_consonant': 0.999678, 'recall_word': 0.997684, 'acc_grapheme': 0.997757, 'acc_vowel': 0.998779, 'acc_consonant': 0.999526, 'acc_word': 0.997657, 'loss_grapheme': 0.011229, 'loss_vowel': 0.00584, 'loss_consonant': 0.002278, 'loss_word': 0.012071}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   24 | 0.000150 | 160640/160716 | 14.5408 | 5.2286 | 16.14 |
val: {'recall': 0.997873, 'recall_grapheme': 0.99689, 'recall_vowel': 0.998314, 'recall_consonant': 0.999399, 'recall_word': 0.996185, 'acc_grapheme': 0.99666, 'acc_vowel': 0.99848, 'acc_consonant': 0.999128, 'acc_word': 0.996187, 'loss_grapheme': 0.015515, 'loss_vowel': 0.007073, 'loss_consonant': 0.003943, 'loss_word': 0.015717}
   25 | 0.000141 | 160640/160716 | 0.4484 | 5.3143 | 16.07 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99785, 'recall_grapheme': 0.996825, 'recall_vowel': 0.99839, 'recall_consonant': 0.999359, 'recall_word': 0.996567, 'acc_grapheme': 0.996785, 'acc_vowel': 0.99828, 'acc_consonant': 0.999227, 'acc_word': 0.996536, 'loss_grapheme': 0.01558, 'loss_vowel': 0.007513, 'loss_consonant': 0.003653, 'loss_word': 0.016288}
SWA>>>:


100%|██████████| 1255/1255 [08:33<00:00,  2.27it/s]



val: {'recall': 0.998524, 'recall_grapheme': 0.997859, 'recall_vowel': 0.998708, 'recall_consonant': 0.999672, 'recall_word': 0.997566, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998779, 'acc_consonant': 0.999502, 'acc_word': 0.997533, 'loss_grapheme': 0.011266, 'loss_vowel': 0.005879, 'loss_consonant': 0.002248, 'loss_word': 0.01208}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   26 | 0.000131 | 160640/160716 | 5.4508 | 5.1905 | 16.10 ||
val: {'recall': 0.99799, 'recall_grapheme': 0.997105, 'recall_vowel': 0.998451, 'recall_consonant': 0.9993, 'recall_word': 0.996095, 'acc_grapheme': 0.99681, 'acc_vowel': 0.99843, 'acc_consonant': 0.998928, 'acc_word': 0.996137, 'loss_grapheme': 0.016655, 'loss_vowel': 0.007651, 'loss_consonant': 0.004298, 'loss_word': 0.018038}
   27 | 0.000122 | 160640/160716 | 12.4592 | 5.3977 | 16.09 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998046, 'recall_grapheme': 0.997158, 'recall_vowel': 0.998521, 'recall_consonant': 0.999349, 'recall_word': 0.996664, 'acc_grapheme': 0.996935, 'acc_vowel': 0.998505, 'acc_consonant': 0.999128, 'acc_word': 0.996635, 'loss_grapheme': 0.017051, 'loss_vowel': 0.00898, 'loss_consonant': 0.00475, 'loss_word': 0.017172}
SWA>>>:


100%|██████████| 1255/1255 [08:30<00:00,  2.34it/s]



val: {'recall': 0.99857, 'recall_grapheme': 0.997908, 'recall_vowel': 0.998794, 'recall_consonant': 0.999672, 'recall_word': 0.997609, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998804, 'acc_consonant': 0.999502, 'acc_word': 0.997582, 'loss_grapheme': 0.01127, 'loss_vowel': 0.005837, 'loss_consonant': 0.00223, 'loss_word': 0.012033}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   28 | 0.000113 | 160640/160716 | 0.8240 | 5.3540 | 16.08 ||
val: {'recall': 0.997964, 'recall_grapheme': 0.996908, 'recall_vowel': 0.998607, 'recall_consonant': 0.999433, 'recall_word': 0.996375, 'acc_grapheme': 0.996685, 'acc_vowel': 0.998405, 'acc_consonant': 0.999153, 'acc_word': 0.996411, 'loss_grapheme': 0.01628, 'loss_vowel': 0.00843, 'loss_consonant': 0.003598, 'loss_word': 0.017308}
   29 | 0.000104 | 160640/160716 | 3.5427 | 5.1854 | 16.11 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997792, 'recall_grapheme': 0.996707, 'recall_vowel': 0.998275, 'recall_consonant': 0.999477, 'recall_word': 0.99593, 'acc_grapheme': 0.996685, 'acc_vowel': 0.998181, 'acc_consonant': 0.999227, 'acc_word': 0.996012, 'loss_grapheme': 0.016549, 'loss_vowel': 0.00877, 'loss_consonant': 0.003773, 'loss_word': 0.017984}
SWA>>>:


100%|██████████| 1255/1255 [08:31<00:00,  2.36it/s]



val: {'recall': 0.99855, 'recall_grapheme': 0.997863, 'recall_vowel': 0.998806, 'recall_consonant': 0.999666, 'recall_word': 0.997592, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998804, 'acc_consonant': 0.999477, 'acc_word': 0.997558, 'loss_grapheme': 0.011258, 'loss_vowel': 0.005915, 'loss_consonant': 0.002205, 'loss_word': 0.012035}
   30 | 0.000095 | 160640/160716 | 0.3515 | 5.2043 | 16.12 ||
val: {'recall': 0.997969, 'recall_grapheme': 0.996945, 'recall_vowel': 0.99861, 'recall_consonant': 0.999377, 'recall_word': 0.996506, 'acc_grapheme': 0.996959, 'acc_vowel': 0.998505, 'acc_consonant': 0.999128, 'acc_word': 0.996561, 'loss_grapheme': 0.015022, 'loss_vowel': 0.007563, 'loss_consonant': 0.003652, 'loss_word': 0.01587}
   31 | 0.000086 | 160640/160716 | 8.8605 | 5.0298 | 16.13 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997954, 'recall_grapheme': 0.996983, 'recall_vowel': 0.998349, 'recall_consonant': 0.999502, 'recall_word': 0.996655, 'acc_grapheme': 0.996959, 'acc_vowel': 0.99838, 'acc_consonant': 0.999352, 'acc_word': 0.99666, 'loss_grapheme': 0.015801, 'loss_vowel': 0.00839, 'loss_consonant': 0.003174, 'loss_word': 0.01699}
SWA>>>:


100%|██████████| 1255/1255 [08:30<00:00,  2.34it/s]



val: {'recall': 0.998545, 'recall_grapheme': 0.997881, 'recall_vowel': 0.998721, 'recall_consonant': 0.9997, 'recall_word': 0.997534, 'acc_grapheme': 0.997757, 'acc_vowel': 0.998779, 'acc_consonant': 0.999502, 'acc_word': 0.997508, 'loss_grapheme': 0.011303, 'loss_vowel': 0.006018, 'loss_consonant': 0.002173, 'loss_word': 0.012114}
   32 | 0.000078 | 160640/160716 | 0.6722 | 5.0400 | 16.11 ||
val: {'recall': 0.997847, 'recall_grapheme': 0.996744, 'recall_vowel': 0.99848, 'recall_consonant': 0.99942, 'recall_word': 0.996333, 'acc_grapheme': 0.996635, 'acc_vowel': 0.998554, 'acc_consonant': 0.999128, 'acc_word': 0.996361, 'loss_grapheme': 0.016663, 'loss_vowel': 0.008271, 'loss_consonant': 0.003911, 'loss_word': 0.017654}
   33 | 0.000070 | 160640/160716 | 6.9233 | 5.1518 | 16.11 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997836, 'recall_grapheme': 0.996742, 'recall_vowel': 0.998477, 'recall_consonant': 0.999381, 'recall_word': 0.996723, 'acc_grapheme': 0.996935, 'acc_vowel': 0.998405, 'acc_consonant': 0.999178, 'acc_word': 0.996735, 'loss_grapheme': 0.015752, 'loss_vowel': 0.008143, 'loss_consonant': 0.003521, 'loss_word': 0.017075}
SWA>>>:


100%|██████████| 1255/1255 [08:42<00:00,  2.27it/s]



val: {'recall': 0.998566, 'recall_grapheme': 0.997872, 'recall_vowel': 0.998819, 'recall_consonant': 0.9997, 'recall_word': 0.99758, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998829, 'acc_consonant': 0.999502, 'acc_word': 0.997558, 'loss_grapheme': 0.011263, 'loss_vowel': 0.006068, 'loss_consonant': 0.002155, 'loss_word': 0.012094}
   34 | 0.000062 | 160640/160716 | 0.3612 | 5.0283 | 16.13 ||
val: {'recall': 0.998127, 'recall_grapheme': 0.997118, 'recall_vowel': 0.998809, 'recall_consonant': 0.999463, 'recall_word': 0.996851, 'acc_grapheme': 0.997034, 'acc_vowel': 0.998654, 'acc_consonant': 0.999302, 'acc_word': 0.99686, 'loss_grapheme': 0.014864, 'loss_vowel': 0.007955, 'loss_consonant': 0.003343, 'loss_word': 0.015897}
   35 | 0.000054 | 160640/160716 | 7.2340 | 5.0855 | 16.11 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998223, 'recall_grapheme': 0.99738, 'recall_vowel': 0.998863, 'recall_consonant': 0.999269, 'recall_word': 0.996903, 'acc_grapheme': 0.997234, 'acc_vowel': 0.998629, 'acc_consonant': 0.999153, 'acc_word': 0.996885, 'loss_grapheme': 0.014779, 'loss_vowel': 0.007705, 'loss_consonant': 0.003196, 'loss_word': 0.015804}
SWA>>>:


100%|██████████| 1255/1255 [08:40<00:00,  2.31it/s]



val: {'recall': 0.998538, 'recall_grapheme': 0.997818, 'recall_vowel': 0.998819, 'recall_consonant': 0.9997, 'recall_word': 0.997602, 'acc_grapheme': 0.997757, 'acc_vowel': 0.998829, 'acc_consonant': 0.999502, 'acc_word': 0.997582, 'loss_grapheme': 0.011209, 'loss_vowel': 0.006007, 'loss_consonant': 0.002147, 'loss_word': 0.012036}
   36 | 0.000047 | 160640/160716 | 6.2500 | 5.3409 | 16.12 ||
val: {'recall': 0.997945, 'recall_grapheme': 0.997061, 'recall_vowel': 0.998397, 'recall_consonant': 0.999259, 'recall_word': 0.996747, 'acc_grapheme': 0.996984, 'acc_vowel': 0.998405, 'acc_consonant': 0.999128, 'acc_word': 0.996785, 'loss_grapheme': 0.015625, 'loss_vowel': 0.008049, 'loss_consonant': 0.003691, 'loss_word': 0.016892}
   37 | 0.000041 | 160640/160716 | 0.2279 | 5.2630 | 16.13 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998071, 'recall_grapheme': 0.997039, 'recall_vowel': 0.998573, 'recall_consonant': 0.999632, 'recall_word': 0.996803, 'acc_grapheme': 0.997009, 'acc_vowel': 0.998554, 'acc_consonant': 0.999327, 'acc_word': 0.996835, 'loss_grapheme': 0.015373, 'loss_vowel': 0.007592, 'loss_consonant': 0.003083, 'loss_word': 0.016246}
SWA>>>:


100%|██████████| 1255/1255 [08:35<00:00,  2.33it/s]



val: {'recall': 0.998539, 'recall_grapheme': 0.997805, 'recall_vowel': 0.998846, 'recall_consonant': 0.9997, 'recall_word': 0.997602, 'acc_grapheme': 0.997732, 'acc_vowel': 0.998854, 'acc_consonant': 0.999502, 'acc_word': 0.997582, 'loss_grapheme': 0.01124, 'loss_vowel': 0.00602, 'loss_consonant': 0.00216, 'loss_word': 0.012079}
   38 | 0.000034 | 160640/160716 | 4.1157 | 5.2583 | 16.08 ||
val: {'recall': 0.997863, 'recall_grapheme': 0.996857, 'recall_vowel': 0.998417, 'recall_consonant': 0.999318, 'recall_word': 0.996428, 'acc_grapheme': 0.99681, 'acc_vowel': 0.998405, 'acc_consonant': 0.999128, 'acc_word': 0.996436, 'loss_grapheme': 0.015862, 'loss_vowel': 0.008381, 'loss_consonant': 0.003228, 'loss_word': 0.017209}
   39 | 0.000029 | 160640/160716 | 3.4274 | 5.2515 | 16.14 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997918, 'recall_grapheme': 0.996776, 'recall_vowel': 0.998631, 'recall_consonant': 0.999489, 'recall_word': 0.996776, 'acc_grapheme': 0.996984, 'acc_vowel': 0.998579, 'acc_consonant': 0.999302, 'acc_word': 0.99681, 'loss_grapheme': 0.015416, 'loss_vowel': 0.007891, 'loss_consonant': 0.003029, 'loss_word': 0.016762}
SWA>>>:


100%|██████████| 1255/1255 [08:45<00:00,  2.35it/s]



val: {'recall': 0.998554, 'recall_grapheme': 0.997837, 'recall_vowel': 0.998875, 'recall_consonant': 0.999669, 'recall_word': 0.997595, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998878, 'acc_consonant': 0.999477, 'acc_word': 0.997582, 'loss_grapheme': 0.011221, 'loss_vowel': 0.006036, 'loss_consonant': 0.002145, 'loss_word': 0.012089}
   40 | 0.000023 | 160640/160716 | 3.8719 | 5.0517 | 16.11 ||
val: {'recall': 0.998126, 'recall_grapheme': 0.997174, 'recall_vowel': 0.998672, 'recall_consonant': 0.999484, 'recall_word': 0.996761, 'acc_grapheme': 0.997109, 'acc_vowel': 0.99848, 'acc_consonant': 0.999277, 'acc_word': 0.99681, 'loss_grapheme': 0.015341, 'loss_vowel': 0.007959, 'loss_consonant': 0.002954, 'loss_word': 0.016735}
   41 | 0.000019 | 160640/160716 | 4.3443 | 4.9051 | 16.14 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998053, 'recall_grapheme': 0.996967, 'recall_vowel': 0.998797, 'recall_consonant': 0.999479, 'recall_word': 0.996773, 'acc_grapheme': 0.997034, 'acc_vowel': 0.998505, 'acc_consonant': 0.999252, 'acc_word': 0.996835, 'loss_grapheme': 0.016114, 'loss_vowel': 0.008419, 'loss_consonant': 0.003258, 'loss_word': 0.017378}
SWA>>>:


100%|██████████| 1255/1255 [08:41<00:00,  2.31it/s]



val: {'recall': 0.998558, 'recall_grapheme': 0.997837, 'recall_vowel': 0.998891, 'recall_consonant': 0.999669, 'recall_word': 0.9977, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998903, 'acc_consonant': 0.999477, 'acc_word': 0.997682, 'loss_grapheme': 0.011156, 'loss_vowel': 0.006048, 'loss_consonant': 0.002149, 'loss_word': 0.012063}
   42 | 0.000014 | 160640/160716 | 0.3771 | 4.8242 | 16.14 ||
val: {'recall': 0.998467, 'recall_grapheme': 0.99772, 'recall_vowel': 0.998871, 'recall_consonant': 0.999558, 'recall_word': 0.99743, 'acc_grapheme': 0.997582, 'acc_vowel': 0.998704, 'acc_consonant': 0.999477, 'acc_word': 0.997458, 'loss_grapheme': 0.012791, 'loss_vowel': 0.006768, 'loss_consonant': 0.002529, 'loss_word': 0.01347}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth
   43 | 0.000011 | 160640/160716 | 3.9523 | 4.9743 | 16.15 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998207, 'recall_grapheme': 0.997238, 'recall_vowel': 0.998828, 'recall_consonant': 0.999523, 'recall_word': 0.996918, 'acc_grapheme': 0.997159, 'acc_vowel': 0.998604, 'acc_consonant': 0.999327, 'acc_word': 0.996959, 'loss_grapheme': 0.014628, 'loss_vowel': 0.007673, 'loss_consonant': 0.002754, 'loss_word': 0.015494}
SWA>>>:


100%|██████████| 1255/1255 [08:40<00:00,  2.34it/s]



val: {'recall': 0.998568, 'recall_grapheme': 0.997876, 'recall_vowel': 0.998848, 'recall_consonant': 0.999674, 'recall_word': 0.997676, 'acc_grapheme': 0.997832, 'acc_vowel': 0.998829, 'acc_consonant': 0.999502, 'acc_word': 0.997657, 'loss_grapheme': 0.011135, 'loss_vowel': 0.006058, 'loss_consonant': 0.002144, 'loss_word': 0.012055}
   44 | 0.000007 | 160640/160716 | 0.5697 | 4.9785 | 16.18 ||
val: {'recall': 0.998177, 'recall_grapheme': 0.997137, 'recall_vowel': 0.998833, 'recall_consonant': 0.999602, 'recall_word': 0.99685, 'acc_grapheme': 0.997134, 'acc_vowel': 0.998704, 'acc_consonant': 0.999427, 'acc_word': 0.996885, 'loss_grapheme': 0.014332, 'loss_vowel': 0.007566, 'loss_consonant': 0.002667, 'loss_word': 0.015301}
   45 | 0.000005 | 160640/160716 | 0.0739 | 4.7403 | 16.22 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998363, 'recall_grapheme': 0.997425, 'recall_vowel': 0.998947, 'recall_consonant': 0.999653, 'recall_word': 0.997166, 'acc_grapheme': 0.997383, 'acc_vowel': 0.998804, 'acc_consonant': 0.999526, 'acc_word': 0.997209, 'loss_grapheme': 0.012887, 'loss_vowel': 0.006701, 'loss_consonant': 0.002318, 'loss_word': 0.013874}
SWA>>>:


100%|██████████| 1255/1255 [08:39<00:00,  2.27it/s]



val: {'recall': 0.998582, 'recall_grapheme': 0.997895, 'recall_vowel': 0.998864, 'recall_consonant': 0.999674, 'recall_word': 0.997676, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998854, 'acc_consonant': 0.999502, 'acc_word': 0.997657, 'loss_grapheme': 0.011109, 'loss_vowel': 0.00605, 'loss_consonant': 0.002146, 'loss_word': 0.012047}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   46 | 0.000003 | 160640/160716 | 0.0599 | 4.8403 | 16.15 ||
val: {'recall': 0.998356, 'recall_grapheme': 0.997452, 'recall_vowel': 0.998853, 'recall_consonant': 0.999666, 'recall_word': 0.997137, 'acc_grapheme': 0.997333, 'acc_vowel': 0.998679, 'acc_consonant': 0.999477, 'acc_word': 0.997184, 'loss_grapheme': 0.013702, 'loss_vowel': 0.006931, 'loss_consonant': 0.002468, 'loss_word': 0.014774}
   47 | 0.000001 | 160640/160716 | 0.4723 | 5.0340 | 16.13 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998401, 'recall_grapheme': 0.997593, 'recall_vowel': 0.998776, 'recall_consonant': 0.999641, 'recall_word': 0.997085, 'acc_grapheme': 0.997508, 'acc_vowel': 0.998654, 'acc_consonant': 0.999477, 'acc_word': 0.997134, 'loss_grapheme': 0.013401, 'loss_vowel': 0.007086, 'loss_consonant': 0.002666, 'loss_word': 0.014299}
SWA>>>:


100%|██████████| 1255/1255 [08:36<00:00,  2.38it/s]



val: {'recall': 0.998585, 'recall_grapheme': 0.997886, 'recall_vowel': 0.998864, 'recall_consonant': 0.999705, 'recall_word': 0.997799, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998854, 'acc_consonant': 0.999526, 'acc_word': 0.997782, 'loss_grapheme': 0.011071, 'loss_vowel': 0.006032, 'loss_consonant': 0.002132, 'loss_word': 0.012017}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   48 | 0.000000 | 160640/160716 | 0.5877 | 5.2533 | 16.12 ||
val: {'recall': 0.998292, 'recall_grapheme': 0.997347, 'recall_vowel': 0.998857, 'recall_consonant': 0.999618, 'recall_word': 0.997044, 'acc_grapheme': 0.997234, 'acc_vowel': 0.998654, 'acc_consonant': 0.999377, 'acc_word': 0.997084, 'loss_grapheme': 0.013832, 'loss_vowel': 0.007252, 'loss_consonant': 0.002743, 'loss_word': 0.01471}
   49 | 0.000000 | 160640/160716 | 0.0397 | 5.1841 | 16.10 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998167, 'recall_grapheme': 0.99721, 'recall_vowel': 0.998744, 'recall_consonant': 0.999507, 'recall_word': 0.996852, 'acc_grapheme': 0.997134, 'acc_vowel': 0.998554, 'acc_consonant': 0.999352, 'acc_word': 0.996885, 'loss_grapheme': 0.014699, 'loss_vowel': 0.007775, 'loss_consonant': 0.002981, 'loss_word': 0.015285}
SWA>>>:


100%|██████████| 1255/1255 [08:36<00:00,  2.30it/s]



val: {'recall': 0.99859, 'recall_grapheme': 0.997891, 'recall_vowel': 0.998875, 'recall_consonant': 0.999705, 'recall_word': 0.997727, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998878, 'acc_consonant': 0.999526, 'acc_word': 0.997707, 'loss_grapheme': 0.011106, 'loss_vowel': 0.006066, 'loss_consonant': 0.002153, 'loss_word': 0.012075}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
CYCLE: 2
    8 | 0.000277 | 160640/160716 | 10.1806 | 5.2999 | 16.13 |
val: {'recall': 0.997233, 'recall_grapheme': 0.995683, 'recall_vowel': 0.998407, 'recall_consonant': 0.99916, 'recall_word': 0.995581, 'acc_grapheme': 0.995639, 'acc_vowel': 0.998255, 'acc_consonant': 0.999003, 'acc_word': 0.995564, 'loss_grapheme': 0.018449, 'loss_vowel': 0.008664, 'loss_consonant': 0.003902, 'loss_word': 0.018997}
    9 | 0.000271 | 160640/160716 | 7.4838 | 5.4551 | 16.14 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997595, 'recall_grapheme': 0.996199, 'recall_vowel': 0.998607, 'recall_consonant': 0.999375, 'recall_word': 0.996147, 'acc_grapheme': 0.996411, 'acc_vowel': 0.998405, 'acc_consonant': 0.998928, 'acc_word': 0.996137, 'loss_grapheme': 0.017194, 'loss_vowel': 0.007709, 'loss_consonant': 0.005101, 'loss_word': 0.017654}
SWA>>>:


100%|██████████| 1255/1255 [08:40<00:00,  2.29it/s]



val: {'recall': 0.998589, 'recall_grapheme': 0.997888, 'recall_vowel': 0.998875, 'recall_consonant': 0.999705, 'recall_word': 0.997811, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998878, 'acc_consonant': 0.999526, 'acc_word': 0.997782, 'loss_grapheme': 0.011042, 'loss_vowel': 0.006001, 'loss_consonant': 0.002163, 'loss_word': 0.012028}
   10 | 0.000266 | 160640/160716 | 1.4577 | 5.2489 | 16.14 ||
val: {'recall': 0.997704, 'recall_grapheme': 0.996388, 'recall_vowel': 0.998795, 'recall_consonant': 0.999247, 'recall_word': 0.996381, 'acc_grapheme': 0.99671, 'acc_vowel': 0.998754, 'acc_consonant': 0.998928, 'acc_word': 0.996336, 'loss_grapheme': 0.019258, 'loss_vowel': 0.009315, 'loss_consonant': 0.006259, 'loss_word': 0.018882}
   11 | 0.000263 | 063872/160716 | 12.7251 | 5.5997 | 6.42 |

KeyboardInterrupt: 

In [None]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: False

val: {'recall': 0.998018, 'recall_grapheme': 0.997036, 'recall_vowel': 0.998639, 'recall_consonant': 0.999361, 'recall_word': 0.996448, 'acc_grapheme': 0.99691, 'acc_vowel': 0.998629, 'acc_consonant': 0.999202, 'acc_word': 0.996461, 'loss_grapheme': 0.014569, 'loss_vowel': 0.006717, 'loss_consonant': 0.003164, 'loss_word': 0.015258}
CYCLE: 1
    0 | 0.000150 | 160640/160716 | 12.2697 | 5.3472 | 14.08 |
val: {'recall': 0.998146, 'recall_grapheme': 0.997216, 'recall_vowel': 0.998845, 'recall_consonant': 0.999307, 'recall_word': 0.996611, 'acc_grapheme': 0.996959, 'acc_vowel': 0.998579, 'acc_consonant': 0.999103, 'acc_word': 0.996685, 'loss_grapheme': 0.01638, 'loss_vowel': 0.007743, 'loss_consonant': 0.004658, 'loss_word': 0.016503}
###>>>>> saved ./m

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99752, 'recall_grapheme': 0.996393, 'recall_vowel': 0.998183, 'recall_consonant': 0.99911, 'recall_word': 0.995869, 'acc_grapheme': 0.996187, 'acc_vowel': 0.99823, 'acc_consonant': 0.998878, 'acc_word': 0.995838, 'loss_grapheme': 0.021373, 'loss_vowel': 0.012239, 'loss_consonant': 0.007318, 'loss_word': 0.020206}
SWA>>>:


100%|██████████| 1255/1255 [07:33<00:00,  2.68it/s]



val: {'recall': 0.997533, 'recall_grapheme': 0.996394, 'recall_vowel': 0.998198, 'recall_consonant': 0.999147, 'recall_word': 0.996213, 'acc_grapheme': 0.996361, 'acc_vowel': 0.998255, 'acc_consonant': 0.998903, 'acc_word': 0.996112, 'loss_grapheme': 0.015758, 'loss_vowel': 0.008725, 'loss_consonant': 0.00417, 'loss_word': 0.017115}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   10 | 0.000266 | 160640/160716 | 0.7321 | 5.7232 | 14.63 ||
val: {'recall': 0.996911, 'recall_grapheme': 0.99547, 'recall_vowel': 0.998028, 'recall_consonant': 0.998674, 'recall_word': 0.995199, 'acc_grapheme': 0.995514, 'acc_vowel': 0.998081, 'acc_consonant': 0.998604, 'acc_word': 0.99509, 'loss_grapheme': 0.021331, 'loss_vowel': 0.010744, 'loss_consonant': 0.006966, 'loss_word': 0.022242}
   11 | 0.000259 | 160640/160716 | 5.5887 | 5.7066 | 14.59 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997106, 'recall_grapheme': 0.995695, 'recall_vowel': 0.998054, 'recall_consonant': 0.998979, 'recall_word': 0.995321, 'acc_grapheme': 0.995813, 'acc_vowel': 0.998131, 'acc_consonant': 0.998804, 'acc_word': 0.995265, 'loss_grapheme': 0.021846, 'loss_vowel': 0.012191, 'loss_consonant': 0.007208, 'loss_word': 0.021108}
SWA>>>:


 80%|███████▉  | 1001/1255 [06:00<01:28,  2.87it/s]


val: {'recall': 0.998053, 'recall_grapheme': 0.997278, 'recall_vowel': 0.998432, 'recall_consonant': 0.999222, 'recall_word': 0.997079, 'acc_grapheme': 0.997358, 'acc_vowel': 0.99848, 'acc_consonant': 0.999053, 'acc_word': 0.997059, 'loss_grapheme': 0.012955, 'loss_vowel': 0.007658, 'loss_consonant': 0.00334, 'loss_word': 0.014221}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   12 | 0.000253 | 160640/160716 | 7.0308 | 5.5489 | 14.65 ||
val: {'recall': 0.997123, 'recall_grapheme': 0.995739, 'recall_vowel': 0.998187, 'recall_consonant': 0.998829, 'recall_word': 0.994984, 'acc_grapheme': 0.995614, 'acc_vowel': 0.998081, 'acc_consonant': 0.998654, 'acc_word': 0.99504, 'loss_grapheme': 0.02126, 'loss_vowel': 0.010248, 'loss_consonant': 0.006952, 'loss_word': 0.021844}
   13 | 0.000246 | 160640/160716 | 0.3061 | 5.6740 | 14.62 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997407, 'recall_grapheme': 0.996143, 'recall_vowel': 0.998339, 'recall_consonant': 0.999001, 'recall_word': 0.995697, 'acc_grapheme': 0.996311, 'acc_vowel': 0.99833, 'acc_consonant': 0.998704, 'acc_word': 0.995738, 'loss_grapheme': 0.018386, 'loss_vowel': 0.008741, 'loss_consonant': 0.006187, 'loss_word': 0.019451}
SWA>>>:


 39%|███▊      | 485/1255 [02:56<04:32,  2.83it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   17 | 0.000214 | 160640/160716 | 0.6688 | 5.4221 | 14.65 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997572, 'recall_grapheme': 0.996537, 'recall_vowel': 0.998222, 'recall_consonant': 0.998991, 'recall_word': 0.996372, 'acc_grapheme': 0.996536, 'acc_vowel': 0.998206, 'acc_consonant': 0.998903, 'acc_word': 0.996336, 'loss_grapheme': 0.016323, 'loss_vowel': 0.008043, 'loss_consonant': 0.003894, 'loss_word': 0.016926}
SWA>>>:


100%|██████████| 1255/1255 [07:34<00:00,  2.74it/s]



val: {'recall': 0.998281, 'recall_grapheme': 0.997461, 'recall_vowel': 0.998732, 'recall_consonant': 0.999472, 'recall_word': 0.997331, 'acc_grapheme': 0.997757, 'acc_vowel': 0.998704, 'acc_consonant': 0.999327, 'acc_word': 0.997333, 'loss_grapheme': 0.011914, 'loss_vowel': 0.006443, 'loss_consonant': 0.003048, 'loss_word': 0.012786}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   18 | 0.000207 | 120064/160716 | 5.3635 | 5.5932 | 10.97 ||

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|██████████| 1255/1255 [07:31<00:00,  2.74it/s]



val: {'recall': 0.99828, 'recall_grapheme': 0.997523, 'recall_vowel': 0.998581, 'recall_consonant': 0.999493, 'recall_word': 0.997424, 'acc_grapheme': 0.997657, 'acc_vowel': 0.998729, 'acc_consonant': 0.999302, 'acc_word': 0.997383, 'loss_grapheme': 0.011711, 'loss_vowel': 0.006383, 'loss_consonant': 0.003019, 'loss_word': 0.012573}
   22 | 0.000169 | 160640/160716 | 11.8752 | 5.4375 | 14.66 |
val: {'recall': 0.997365, 'recall_grapheme': 0.996354, 'recall_vowel': 0.998112, 'recall_consonant': 0.998639, 'recall_word': 0.996268, 'acc_grapheme': 0.996386, 'acc_vowel': 0.998255, 'acc_consonant': 0.998928, 'acc_word': 0.996262, 'loss_grapheme': 0.01757, 'loss_vowel': 0.008387, 'loss_consonant': 0.004676, 'loss_word': 0.017935}
   23 | 0.000159 | 160640/160716 | 0.4429 | 5.1324 | 14.69 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997946, 'recall_grapheme': 0.997161, 'recall_vowel': 0.998156, 'recall_consonant': 0.999304, 'recall_word': 0.996665, 'acc_grapheme': 0.997009, 'acc_vowel': 0.99838, 'acc_consonant': 0.999078, 'acc_word': 0.99666, 'loss_grapheme': 0.014482, 'loss_vowel': 0.007627, 'loss_consonant': 0.003492, 'loss_word': 0.015458}
SWA>>>:


 14%|█▍        | 178/1255 [01:06<06:24,  2.80it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




val: {'recall': 0.997453, 'recall_grapheme': 0.99633, 'recall_vowel': 0.998296, 'recall_consonant': 0.998854, 'recall_word': 0.996198, 'acc_grapheme': 0.996586, 'acc_vowel': 0.99843, 'acc_consonant': 0.998779, 'acc_word': 0.996212, 'loss_grapheme': 0.016974, 'loss_vowel': 0.008299, 'loss_consonant': 0.005154, 'loss_word': 0.018682}
   25 | 0.000141 | 160640/160716 | 7.9934 | 5.3348 | 14.64 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997439, 'recall_grapheme': 0.9963, 'recall_vowel': 0.99826, 'recall_consonant': 0.998898, 'recall_word': 0.996202, 'acc_grapheme': 0.996486, 'acc_vowel': 0.99823, 'acc_consonant': 0.998829, 'acc_word': 0.996212, 'loss_grapheme': 0.016073, 'loss_vowel': 0.008319, 'loss_consonant': 0.004999, 'loss_word': 0.018246}
SWA>>>:


100%|██████████| 1255/1255 [07:33<00:00,  2.81it/s]



val: {'recall': 0.998446, 'recall_grapheme': 0.997724, 'recall_vowel': 0.998765, 'recall_consonant': 0.999571, 'recall_word': 0.99756, 'acc_grapheme': 0.997782, 'acc_vowel': 0.998804, 'acc_consonant': 0.999402, 'acc_word': 0.997533, 'loss_grapheme': 0.011375, 'loss_vowel': 0.00623, 'loss_consonant': 0.002964, 'loss_word': 0.012269}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   26 | 0.000131 | 160640/160716 | 0.5803 | 5.3239 | 14.67 ||
val: {'recall': 0.997887, 'recall_grapheme': 0.996917, 'recall_vowel': 0.998516, 'recall_consonant': 0.999198, 'recall_word': 0.996382, 'acc_grapheme': 0.99681, 'acc_vowel': 0.99848, 'acc_consonant': 0.998953, 'acc_word': 0.996361, 'loss_grapheme': 0.016106, 'loss_vowel': 0.00807, 'loss_consonant': 0.004505, 'loss_word': 0.017375}
   27 | 0.000122 | 160640/160716 | 4.1229 | 5.4939 | 14.65 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997739, 'recall_grapheme': 0.996677, 'recall_vowel': 0.998346, 'recall_consonant': 0.999254, 'recall_word': 0.996145, 'acc_grapheme': 0.996586, 'acc_vowel': 0.99848, 'acc_consonant': 0.999103, 'acc_word': 0.996187, 'loss_grapheme': 0.01536, 'loss_vowel': 0.007826, 'loss_consonant': 0.004286, 'loss_word': 0.016651}
SWA>>>:


100%|██████████| 1255/1255 [07:37<00:00,  2.85it/s]



val: {'recall': 0.998429, 'recall_grapheme': 0.997778, 'recall_vowel': 0.998595, 'recall_consonant': 0.999566, 'recall_word': 0.99761, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998754, 'acc_consonant': 0.999377, 'acc_word': 0.997582, 'loss_grapheme': 0.01121, 'loss_vowel': 0.006232, 'loss_consonant': 0.002917, 'loss_word': 0.012151}
   28 | 0.000113 | 160640/160716 | 0.2682 | 5.3714 | 14.60 ||
val: {'recall': 0.997801, 'recall_grapheme': 0.996991, 'recall_vowel': 0.998102, 'recall_consonant': 0.99912, 'recall_word': 0.996217, 'acc_grapheme': 0.99681, 'acc_vowel': 0.998405, 'acc_consonant': 0.998953, 'acc_word': 0.996187, 'loss_grapheme': 0.015583, 'loss_vowel': 0.007711, 'loss_consonant': 0.004309, 'loss_word': 0.016699}
   29 | 0.000104 | 160640/160716 | 6.5037 | 5.2576 | 14.63 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997678, 'recall_grapheme': 0.996821, 'recall_vowel': 0.998133, 'recall_consonant': 0.998937, 'recall_word': 0.996014, 'acc_grapheme': 0.996461, 'acc_vowel': 0.998554, 'acc_consonant': 0.998854, 'acc_word': 0.996087, 'loss_grapheme': 0.016655, 'loss_vowel': 0.007869, 'loss_consonant': 0.004726, 'loss_word': 0.018406}
SWA>>>:


100%|██████████| 1255/1255 [07:34<00:00,  2.89it/s]



val: {'recall': 0.998463, 'recall_grapheme': 0.997819, 'recall_vowel': 0.998614, 'recall_consonant': 0.999599, 'recall_word': 0.997682, 'acc_grapheme': 0.997906, 'acc_vowel': 0.998779, 'acc_consonant': 0.999402, 'acc_word': 0.997657, 'loss_grapheme': 0.011152, 'loss_vowel': 0.006184, 'loss_consonant': 0.002847, 'loss_word': 0.012094}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   30 | 0.000095 | 160640/160716 | 12.3763 | 5.2593 | 14.65 |
val: {'recall': 0.997905, 'recall_grapheme': 0.996998, 'recall_vowel': 0.998412, 'recall_consonant': 0.99921, 'recall_word': 0.996671, 'acc_grapheme': 0.996885, 'acc_vowel': 0.998255, 'acc_consonant': 0.999128, 'acc_word': 0.99666, 'loss_grapheme': 0.015048, 'loss_vowel': 0.008466, 'loss_consonant': 0.003443, 'loss_word': 0.016293}
   31 | 0.000086 | 160640/160716 | 0.7063 | 4.9486 | 14.69 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.998127, 'recall_grapheme': 0.99735, 'recall_vowel': 0.998582, 'recall_consonant': 0.999226, 'recall_word': 0.996857, 'acc_grapheme': 0.99686, 'acc_vowel': 0.998654, 'acc_consonant': 0.999078, 'acc_word': 0.99686, 'loss_grapheme': 0.013294, 'loss_vowel': 0.006519, 'loss_consonant': 0.003173, 'loss_word': 0.014166}
SWA>>>:


100%|██████████| 1255/1255 [07:32<00:00,  2.75it/s]



val: {'recall': 0.998461, 'recall_grapheme': 0.997829, 'recall_vowel': 0.998614, 'recall_consonant': 0.999571, 'recall_word': 0.997779, 'acc_grapheme': 0.997906, 'acc_vowel': 0.998779, 'acc_consonant': 0.999402, 'acc_word': 0.997757, 'loss_grapheme': 0.011028, 'loss_vowel': 0.006144, 'loss_consonant': 0.002748, 'loss_word': 0.011955}
   32 | 0.000078 | 160640/160716 | 13.7737 | 5.3346 | 14.64 |
val: {'recall': 0.997574, 'recall_grapheme': 0.996358, 'recall_vowel': 0.998473, 'recall_consonant': 0.999109, 'recall_word': 0.996272, 'acc_grapheme': 0.996361, 'acc_vowel': 0.998505, 'acc_consonant': 0.998928, 'acc_word': 0.996262, 'loss_grapheme': 0.016124, 'loss_vowel': 0.008289, 'loss_consonant': 0.003885, 'loss_word': 0.016889}
   33 | 0.000070 | 160640/160716 | 0.3994 | 5.2867 | 14.67 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997951, 'recall_grapheme': 0.997003, 'recall_vowel': 0.9985, 'recall_consonant': 0.999297, 'recall_word': 0.99661, 'acc_grapheme': 0.996835, 'acc_vowel': 0.99848, 'acc_consonant': 0.999153, 'acc_word': 0.99666, 'loss_grapheme': 0.014162, 'loss_vowel': 0.00712, 'loss_consonant': 0.003515, 'loss_word': 0.0152}
SWA>>>:


100%|██████████| 1255/1255 [07:34<00:00,  2.83it/s]



val: {'recall': 0.99847, 'recall_grapheme': 0.997859, 'recall_vowel': 0.998592, 'recall_consonant': 0.999571, 'recall_word': 0.997705, 'acc_grapheme': 0.997956, 'acc_vowel': 0.998779, 'acc_consonant': 0.999402, 'acc_word': 0.997682, 'loss_grapheme': 0.011005, 'loss_vowel': 0.006131, 'loss_consonant': 0.002715, 'loss_word': 0.011928}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   34 | 0.000062 | 160640/160716 | 7.8940 | 5.4673 | 14.59 ||
val: {'recall': 0.99791, 'recall_grapheme': 0.997038, 'recall_vowel': 0.998356, 'recall_consonant': 0.99921, 'recall_word': 0.996355, 'acc_grapheme': 0.99681, 'acc_vowel': 0.99848, 'acc_consonant': 0.999003, 'acc_word': 0.996361, 'loss_grapheme': 0.014869, 'loss_vowel': 0.007765, 'loss_consonant': 0.004008, 'loss_word': 0.016383}
   35 | 0.000054 | 160640/160716 | 9.0463 | 5.1709 | 14.66 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997952, 'recall_grapheme': 0.997056, 'recall_vowel': 0.998498, 'recall_consonant': 0.999199, 'recall_word': 0.996643, 'acc_grapheme': 0.996885, 'acc_vowel': 0.998629, 'acc_consonant': 0.998978, 'acc_word': 0.996611, 'loss_grapheme': 0.015582, 'loss_vowel': 0.007655, 'loss_consonant': 0.004191, 'loss_word': 0.016741}
SWA>>>:


100%|██████████| 1255/1255 [07:36<00:00,  2.72it/s]



val: {'recall': 0.998468, 'recall_grapheme': 0.997849, 'recall_vowel': 0.998602, 'recall_consonant': 0.999571, 'recall_word': 0.997609, 'acc_grapheme': 0.997931, 'acc_vowel': 0.998779, 'acc_consonant': 0.999402, 'acc_word': 0.997582, 'loss_grapheme': 0.011009, 'loss_vowel': 0.006135, 'loss_consonant': 0.002685, 'loss_word': 0.01194}
   36 | 0.000047 | 160640/160716 | 7.2906 | 5.5424 | 14.61 ||
val: {'recall': 0.997965, 'recall_grapheme': 0.99708, 'recall_vowel': 0.998521, 'recall_consonant': 0.999179, 'recall_word': 0.996669, 'acc_grapheme': 0.996885, 'acc_vowel': 0.998654, 'acc_consonant': 0.998978, 'acc_word': 0.996685, 'loss_grapheme': 0.013918, 'loss_vowel': 0.006891, 'loss_consonant': 0.00331, 'loss_word': 0.014968}
   37 | 0.000041 | 160640/160716 | 0.1630 | 5.1517 | 14.67 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.99808, 'recall_grapheme': 0.997258, 'recall_vowel': 0.998578, 'recall_consonant': 0.999227, 'recall_word': 0.99666, 'acc_grapheme': 0.997084, 'acc_vowel': 0.998654, 'acc_consonant': 0.999078, 'acc_word': 0.99671, 'loss_grapheme': 0.014431, 'loss_vowel': 0.007316, 'loss_consonant': 0.003423, 'loss_word': 0.016057}
SWA>>>:


100%|██████████| 1255/1255 [07:34<00:00,  2.83it/s]



val: {'recall': 0.998487, 'recall_grapheme': 0.997882, 'recall_vowel': 0.998613, 'recall_consonant': 0.999571, 'recall_word': 0.997656, 'acc_grapheme': 0.997882, 'acc_vowel': 0.998804, 'acc_consonant': 0.999402, 'acc_word': 0.997632, 'loss_grapheme': 0.011027, 'loss_vowel': 0.006146, 'loss_consonant': 0.002644, 'loss_word': 0.01195}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   38 | 0.000034 | 160640/160716 | 6.3363 | 5.1558 | 14.66 ||
val: {'recall': 0.997826, 'recall_grapheme': 0.996821, 'recall_vowel': 0.998474, 'recall_consonant': 0.999189, 'recall_word': 0.996387, 'acc_grapheme': 0.996611, 'acc_vowel': 0.99848, 'acc_consonant': 0.998928, 'acc_word': 0.996411, 'loss_grapheme': 0.015475, 'loss_vowel': 0.007582, 'loss_consonant': 0.003711, 'loss_word': 0.016895}
   39 | 0.000029 | 160640/160716 | 4.1006 | 5.2178 | 14.74 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997868, 'recall_grapheme': 0.996824, 'recall_vowel': 0.998614, 'recall_consonant': 0.999209, 'recall_word': 0.996654, 'acc_grapheme': 0.996885, 'acc_vowel': 0.998629, 'acc_consonant': 0.999003, 'acc_word': 0.996735, 'loss_grapheme': 0.014271, 'loss_vowel': 0.007163, 'loss_consonant': 0.003271, 'loss_word': 0.015567}
SWA>>>:


100%|██████████| 1255/1255 [07:33<00:00,  2.86it/s]



val: {'recall': 0.998477, 'recall_grapheme': 0.997876, 'recall_vowel': 0.998554, 'recall_consonant': 0.999602, 'recall_word': 0.997616, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998804, 'acc_consonant': 0.999427, 'acc_word': 0.997607, 'loss_grapheme': 0.011004, 'loss_vowel': 0.006136, 'loss_consonant': 0.002593, 'loss_word': 0.011916}
   40 | 0.000023 | 160640/160716 | 5.2387 | 5.0428 | 14.70 ||
val: {'recall': 0.997599, 'recall_grapheme': 0.996436, 'recall_vowel': 0.998378, 'recall_consonant': 0.999148, 'recall_word': 0.996277, 'acc_grapheme': 0.996386, 'acc_vowel': 0.998505, 'acc_consonant': 0.999003, 'acc_word': 0.996287, 'loss_grapheme': 0.015697, 'loss_vowel': 0.007876, 'loss_consonant': 0.004117, 'loss_word': 0.017314}
   41 | 0.000019 | 160640/160716 | 6.8449 | 4.8242 | 14.71 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997839, 'recall_grapheme': 0.996943, 'recall_vowel': 0.998297, 'recall_consonant': 0.999173, 'recall_word': 0.996488, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998505, 'acc_consonant': 0.998978, 'acc_word': 0.996486, 'loss_grapheme': 0.015381, 'loss_vowel': 0.008155, 'loss_consonant': 0.004105, 'loss_word': 0.017035}
SWA>>>:


100%|██████████| 1255/1255 [07:35<00:00,  2.82it/s]



val: {'recall': 0.998499, 'recall_grapheme': 0.997926, 'recall_vowel': 0.998543, 'recall_consonant': 0.999602, 'recall_word': 0.997673, 'acc_grapheme': 0.997882, 'acc_vowel': 0.998779, 'acc_consonant': 0.999427, 'acc_word': 0.997657, 'loss_grapheme': 0.010988, 'loss_vowel': 0.006149, 'loss_consonant': 0.00255, 'loss_word': 0.011906}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa
   42 | 0.000016 | 111872/160716 | 0.1516 | 5.0661 | 10.24 ||

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



   43 | 0.000011 | 160640/160716 | 0.1909 | 5.0122 | 14.72 ||

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.9979, 'recall_grapheme': 0.996928, 'recall_vowel': 0.998495, 'recall_consonant': 0.999249, 'recall_word': 0.996696, 'acc_grapheme': 0.99691, 'acc_vowel': 0.998679, 'acc_consonant': 0.999078, 'acc_word': 0.99671, 'loss_grapheme': 0.014327, 'loss_vowel': 0.007493, 'loss_consonant': 0.003395, 'loss_word': 0.015556}
SWA>>>:


100%|██████████| 1255/1255 [07:38<00:00,  2.87it/s]



val: {'recall': 0.998445, 'recall_grapheme': 0.997875, 'recall_vowel': 0.998425, 'recall_consonant': 0.999602, 'recall_word': 0.997624, 'acc_grapheme': 0.997857, 'acc_vowel': 0.998779, 'acc_consonant': 0.999427, 'acc_word': 0.997607, 'loss_grapheme': 0.010971, 'loss_vowel': 0.006117, 'loss_consonant': 0.002521, 'loss_word': 0.011874}
   44 | 0.000007 | 160640/160716 | 0.2340 | 5.2228 | 14.72 ||
val: {'recall': 0.997859, 'recall_grapheme': 0.996893, 'recall_vowel': 0.99846, 'recall_consonant': 0.99919, 'recall_word': 0.996524, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998679, 'acc_consonant': 0.999053, 'acc_word': 0.996561, 'loss_grapheme': 0.014782, 'loss_vowel': 0.007684, 'loss_consonant': 0.003487, 'loss_word': 0.016547}
   45 | 0.000005 | 160640/160716 | 5.6404 | 5.1102 | 14.66 ||
val: {'recall': 0.998172, 'recall_grapheme': 0.997521, 'recall_vowel': 0.99839, 'recall_consonant': 0.999254, 'recall_word': 0.99687, 'acc_grapheme': 0.997109, 'acc_vowel': 0.998654, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]

###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth
SWA>>>:


100%|██████████| 1255/1255 [07:37<00:00,  2.76it/s]



val: {'recall': 0.998418, 'recall_grapheme': 0.997885, 'recall_vowel': 0.998298, 'recall_consonant': 0.999602, 'recall_word': 0.997674, 'acc_grapheme': 0.997882, 'acc_vowel': 0.998729, 'acc_consonant': 0.999427, 'acc_word': 0.997632, 'loss_grapheme': 0.010954, 'loss_vowel': 0.006123, 'loss_consonant': 0.0025, 'loss_word': 0.011891}
   46 | 0.000003 | 160640/160716 | 7.5181 | 5.0467 | 14.73 ||
val: {'recall': 0.998119, 'recall_grapheme': 0.997374, 'recall_vowel': 0.998458, 'recall_consonant': 0.999271, 'recall_word': 0.996728, 'acc_grapheme': 0.997034, 'acc_vowel': 0.998604, 'acc_consonant': 0.999178, 'acc_word': 0.996735, 'loss_grapheme': 0.014356, 'loss_vowel': 0.007716, 'loss_consonant': 0.003529, 'loss_word': 0.015543}
   47 | 0.000001 | 160640/160716 | 0.0369 | 5.3343 | 14.65 ||
val: {'recall': 0.998314, 'recall_grapheme': 0.997668, 'recall_vowel': 0.998427, 'recall_consonant': 0.99949, 'recall_word': 0.99722, 'acc_grapheme': 0.997333, 'acc_vowel': 0.998654, 'acc_consonant': 0.999

  0%|          | 0/1255 [00:00<?, ?it/s]

###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth
SWA>>>:


100%|██████████| 1255/1255 [07:37<00:00,  2.69it/s]



val: {'recall': 0.998427, 'recall_grapheme': 0.997885, 'recall_vowel': 0.998303, 'recall_consonant': 0.999635, 'recall_word': 0.997612, 'acc_grapheme': 0.997882, 'acc_vowel': 0.998729, 'acc_consonant': 0.999452, 'acc_word': 0.997582, 'loss_grapheme': 0.01097, 'loss_vowel': 0.006126, 'loss_consonant': 0.002464, 'loss_word': 0.01187}
   48 | 0.000000 | 160640/160716 | 2.1133 | 5.2048 | 14.68 ||
val: {'recall': 0.998108, 'recall_grapheme': 0.997342, 'recall_vowel': 0.998362, 'recall_consonant': 0.999386, 'recall_word': 0.996799, 'acc_grapheme': 0.997084, 'acc_vowel': 0.998629, 'acc_consonant': 0.999202, 'acc_word': 0.99686, 'loss_grapheme': 0.01433, 'loss_vowel': 0.007816, 'loss_consonant': 0.003238, 'loss_word': 0.015761}
   49 | 0.000000 | 160640/160716 | 13.2333 | 5.0441 | 14.73 |

  0%|          | 0/1255 [00:00<?, ?it/s]


val: {'recall': 0.997921, 'recall_grapheme': 0.99699, 'recall_vowel': 0.998526, 'recall_consonant': 0.999179, 'recall_word': 0.996649, 'acc_grapheme': 0.996835, 'acc_vowel': 0.998654, 'acc_consonant': 0.999003, 'acc_word': 0.996685, 'loss_grapheme': 0.014779, 'loss_vowel': 0.008082, 'loss_consonant': 0.003597, 'loss_word': 0.016215}
SWA>>>:


100%|██████████| 1255/1255 [07:39<00:00,  2.70it/s]



val: {'recall': 0.998449, 'recall_grapheme': 0.997936, 'recall_vowel': 0.998287, 'recall_consonant': 0.999635, 'recall_word': 0.997677, 'acc_grapheme': 0.997906, 'acc_vowel': 0.998704, 'acc_consonant': 0.999452, 'acc_word': 0.997632, 'loss_grapheme': 0.010934, 'loss_vowel': 0.006118, 'loss_consonant': 0.002438, 'loss_word': 0.011852}
CYCLE: 2
    0 | 0.000150 | 160640/160716 | 6.1674 | 5.2896 | 14.20 ||
val: {'recall': 0.997694, 'recall_grapheme': 0.996673, 'recall_vowel': 0.998375, 'recall_consonant': 0.999054, 'recall_word': 0.996153, 'acc_grapheme': 0.996635, 'acc_vowel': 0.998405, 'acc_consonant': 0.998779, 'acc_word': 0.996162, 'loss_grapheme': 0.016575, 'loss_vowel': 0.008763, 'loss_consonant': 0.004649, 'loss_word': 0.018412}
    1 | 0.000299 | 160640/160716 | 11.3982 | 5.3649 | 14.31 |
val: {'recall': 0.997435, 'recall_grapheme': 0.996553, 'recall_vowel': 0.997768, 'recall_consonant': 0.998867, 'recall_word': 0.995956, 'acc_grapheme': 0.996162, 'acc_vowel': 0.998106, 'acc_cons

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: False

val: {'recall': 0.997597, 'recall_grapheme': 0.996428, 'recall_vowel': 0.998394, 'recall_consonant': 0.999137, 'recall_word': 0.996368, 'acc_grapheme': 0.996735, 'acc_vowel': 0.99843, 'acc_consonant': 0.998953, 'acc_word': 0.996361, 'loss_grapheme': 0.018075, 'loss_vowel': 0.010609, 'loss_consonant': 0.006052, 'loss_word': 0.017806}
CYCLE: 1
    0 | 0.000200 | 160640/160716 | 6.2135 | 5.5802 | 14.02 ||
val: {'recall': 0.996769, 'recall_grapheme': 0.995271, 'recall_vowel': 0.997749, 'recall_consonant': 0.998786, 'recall_word': 0.995084, 'acc_grapheme': 0.995464, 'acc_vowel': 0.997807, 'acc_consonant': 0.998679, 'acc_word': 0.995065, 'loss_grapheme': 0.027358, 'loss_vowel': 0.016393, 'loss_consonant': 0.011132, 'loss_word': 0.02422}
    1 | 0.000199 |

KeyboardInterrupt: 

In [37]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth...
model file: ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth_swa, exist: False

val: {'recall': 0.796988, 'recall_grapheme': 0.763845, 'recall_vowel': 0.863414, 'recall_consonant': 0.796848, 'recall_word': 0.762264, 'acc_grapheme': 0.493819, 'acc_vowel': 0.788904, 'acc_consonant': 0.767645, 'acc_word': 0.316245, 'loss_grapheme': 1.992506, 'loss_vowel': 0.626105, 'loss_consonant': 0.522644, 'loss_word': 3.290409}
CYCLE: 1


  'recall', 'true', average, warn_for)


    0 | 0.000292 | 160640/160716 | 6.7174 | 10.7151 | 13.99 ||
val: {'recall': 0.992886, 'recall_grapheme': 0.989107, 'recall_vowel': 0.996141, 'recall_consonant': 0.997187, 'recall_word': 0.9882, 'acc_grapheme': 0.989607, 'acc_vowel': 0.996087, 'acc_consonant': 0.996087, 'acc_word': 0.988212, 'loss_grapheme': 0.129831, 'loss_vowel': 0.068911, 'loss_consonant': 0.05198, 'loss_word': 0.144907}
###>>>>> saved ./model4-ckps/tf_efficientnet_b4/model4_eb4_fold3_380.pth
    1 | 0.000543 | 160640/160716 | 7.1504 | 7.3232 | 14.15 ||
val: {'recall': 0.992768, 'recall_grapheme': 0.989784, 'recall_vowel': 0.995191, 'recall_consonant': 0.996311, 'recall_word': 0.989335, 'acc_grapheme': 0.99018, 'acc_vowel': 0.995364, 'acc_consonant': 0.995464, 'acc_word': 0.988884, 'loss_grapheme': 0.069323, 'loss_vowel': 0.043396, 'loss_consonant': 0.0354, 'loss_word': 0.072684}
    2 | 0.000476 | 160640/160716 | 16.1799 | 6.6101 | 14.24 |
val: {'recall': 0.994403, 'recall_grapheme': 0.992025, 'recall_vowel': 0.9

KeyboardInterrupt: 

In [44]:
time.time()

1582104595.1300395

In [None]:
#save_model(model, model_file)

In [None]:
del model