In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image
from utils import bn_update, moving_average, copy_model


In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
#import albumentations as albu
def get_train_augs():
    return RandAugment(n=2, m=27)

In [9]:
#plt.imshow(x)

In [10]:
#np.arange(10).copy()

In [11]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [12]:
#train_loader, val_loader = get_train_val_loaders()

In [13]:
#for x in train_loader:
#    print(x)
#    break

# model

In [14]:
from argparse import Namespace
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

In [15]:
class BengaliNet(nn.Module):
    """
    EfficientNet B0-B8.
    Args:
        cfg (CfgNode): configs
    """
    def __init__(self, model_name='tf_efficientnet_b1'):
        super(BengaliNet, self).__init__()
        pretrained = True
        input_channels = 1
        pool_type = 'avg'
        drop_connect_rate = 0.2
        self.drop_rate = 0.
        cls_head = 'linear'
        num_total_classes = 168 + 11 + 7 + 1295

        backbone = timm.create_model(
            model_name=model_name,
            pretrained=pretrained,
            in_chans=input_channels,
            drop_connect_rate=drop_connect_rate,
        )
        self.conv_stem = backbone.conv_stem
        self.bn1 = backbone.bn1
        self.act1 = backbone.act1
        ### Original blocks ###
        for i in range(len((backbone.blocks))):
            setattr(self, "block{}".format(str(i)), backbone.blocks[i])
        self.conv_head = backbone.conv_head
        self.bn2 = backbone.bn2
        self.act2 = backbone.act2
        self.aux_block5 = backbone.blocks[5]
        self.aux_num_features = self.block5[-1].bn3.num_features
        self.aux_head4 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn4 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act4 = Swish()
        self.aux_head5 = nn.Conv2d(self.aux_num_features, self.aux_num_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn5 = nn.BatchNorm2d(self.aux_num_features * 4)
        self.act5 = Swish()
        self.global_pool = SelectAdaptivePool2d(pool_type=pool_type)
        self.num_features = backbone.num_features * self.global_pool.feat_mult()
        assert cls_head == 'linear'
        if cls_head == "linear":
            ### Baseline head ###
            self.fc = nn.Linear(self.num_features, num_total_classes)            
            self.aux_fc1 = nn.Linear(self.aux_num_features*4, num_total_classes)
            self.aux_fc2 = nn.Linear(self.aux_num_features*4, num_total_classes)
            
            for fc in [self.fc, self.aux_fc1, self.aux_fc2]:
                nn.init.zeros_(fc.bias.data)
        elif cls_head == "norm_softmax":
            ### NormSoftmax ###
            self.grapheme_fc = NormSoftmax(self.num_features, num_grapheme_classes)
            self.consonant_fc = NormSoftmax(self.num_features, num_consonant_classes)
            self.vowel_fc = NormSoftmax(self.num_features, num_vowel_classes)
        # Replace with Mish activation
        #if cfg.MODEL_ACTIVATION == "mish":
        #    convert_swish_to_mish(self)
        del backbone

    def _features(self, x):
        x = self.conv_stem(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.block0(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x); b4 = x
        x = self.block5(x); b4 = self.aux_block5(b4); b5 = x
        x = self.block6(x)
        x = self.conv_head(x); b4 = self.aux_head4(b4); b5 = self.aux_head5(b5)
        x = self.bn2(x); b4 = self.bn4(b4); b5 = self.bn5(b5)
        x = self.act2(x); b4 = self.act4(b4); b5 = self.act5(b5)
        return b4, b5, x

    def forward(self, x):
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)

        # _, _, x = self._features(x)
        b4, b5, x = self._features(x)
        x = self.global_pool(x); b4 = self.global_pool(b4); b5 = self.global_pool(b5)
        x = torch.flatten(x, 1); b4 = torch.flatten(b4, 1); b5 = torch.flatten(b5, 1)
        if self.drop_rate > 0.:
            x = F.dropout(x, p=self.drop_rate, training=self.training)
        logits = self.fc(x)
        
        aux_logits1 = self.aux_fc1(b4)
        aux_logits2 = self.aux_fc2(b5)
        
        return logits, aux_logits1, aux_logits2

In [16]:
MODEL_DIR = './model4-ckps'
def create_model(args):
    model = BengaliNet(args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [17]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [18]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [19]:
round(1/9, 6)

0.111111

In [20]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, preds3, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2) == len(preds3)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    recall_word = sklearn.metrics.recall_score(preds3, y[:, 3], average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    metrics['recall_word'] = round(recall_word, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    metrics['acc_word'] = round((preds3 == y[:, 3]).sum() / len(y), 6)    
    
    return metrics

In [21]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    loss3 = F.cross_entropy(outputs[3], y_true[:, 3], reduction='mean')
    
    return loss0 + loss1 + loss2 + loss3 #, loss0.item(), loss1.item(), loss2.item()
    #return loss3

In [22]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2, loss3 = 0., 0., 0., 0.
    preds0, preds1, preds2, preds3 = [], [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs, outputs_aux1, outputs_aux2 = model(x)
            #avg_outputs = torch.mean(torch.stack([outputs, outputs_aux1, outputs_aux2], 0), 0)
            outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            preds3.append(torch.max(outputs[3], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            loss3 += F.cross_entropy(outputs[3], y[:, 3], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    preds3 = torch.cat(preds3, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, preds3, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    metrics['loss_word'] = round(loss3 / val_loader.num, 6)
    
    return metrics
            

In [23]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [24]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [25]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [26]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [27]:
np.random.random()

0.5157008222205659

In [28]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from gridmask import GridMask

In [29]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [30]:
def train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid):
    train_loss = 0

    for batch_idx, (img, orig_img, targets) in enumerate(train_loader):
        img, orig_img, targets  = img.cuda(), orig_img.cuda(), targets.cuda()
        batch_size = img.size(0)
        r = np.random.rand()

        if r < 0.3:
            # generate mixed sample
            lam = np.random.beta(args.beta, args.beta)
            rand_index = torch.randperm(img.size()[0]).cuda()
            target_a = targets
            target_b = targets[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
            #img[:, :, bby1:bby2, bbx1:bbx2] = img[rand_index, :, bby1:bby2, bbx1:bbx2] #for new cutmix
            img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
            
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
            # compute output
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            loss_aux1 = criterion(outputs_aux1, target_a) * lam + criterion(outputs_aux1, target_b) * (1. - lam)
            loss_aux2 = criterion(outputs_aux2, target_a) * lam + criterion(outputs_aux2, target_b) * (1. - lam)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        elif r > 0.7:
            img = grid(img)
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, targets)
            loss_aux1 = criterion(outputs_aux1, targets)
            loss_aux2 = criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        else:
            orig_img, targets = mixup(orig_img, targets)
            outputs, outputs_aux1, outputs_aux2 = model(orig_img)
            loss_primary = mixup_criterion(outputs, targets)
            loss_aux1 = mixup_criterion(outputs_aux1, targets)
            loss_aux2 = mixup_criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
            #loss = criterion(outputs, targets)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        lr_scheduler(optimizer, batch_idx, epoch)
        optimizer.step()            
        
        current_lr = get_lrs(optimizer)

        train_loss += loss.item()
        print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
            epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
            loss.item(), train_loss/(batch_idx+1)), end='')


In [31]:
best_metrics = 0.

def validate_and_save(model, model_file, val_loader, save=False):
    global best_metrics
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics:
        best_metrics = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()
    

def train(args):
    model, model_file = create_model(args)
    model = model.cuda()

    swa_model, _ = create_model(args)
    swa_model = swa_model.cuda()
    swa_model_file = model_file

    optimizer = make_optimizer(model)
    lr_scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    [model, swa_model], optimizer = amp.initialize([model, swa_model], optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        swa_model = nn.DataParallel(swa_model)
    
    validate_and_save(model, model_file, val_loader, save=False)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

        for epoch in range(args.start_epoch, args.num_epochs):
            grid.set_prob(epoch, args.st_epochs)
            train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid)
            validate_and_save(model, model_file, val_loader, save=True)
            
            if (epoch+1) == args.swa_start and cycle == 1:
                copy_model(swa_model, model)
                swa_n = 0
            if (epoch+1) >= args.swa_start and (epoch+1) % args.swa_freq == 0:
                print('SWA>>>:')
                moving_average(swa_model, model, 1.0 / (swa_n + 1))
                swa_n += 1
                bn_update(train_loader, swa_model)
                validate_and_save(swa_model, swa_model_file, val_loader, save=True)

        #args.base_lr = 4e-4
        # reset scheduler at each cycle
        lr_scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

In [32]:
args = Namespace()
args.backbone = 'tf_efficientnet_b1'
args.ckp_name = 'model4_eb1_fold2.pth'

args.base_lr = 3e-4
args.num_epochs = 100
args.start_epoch = 0
args.warmup_epochs = 10

args.num_cycles = 100
args.batch_size = 1024
args.val_batch_size = 2048
args.st_epochs = 10

args.swa_start = 20
args.swa_freq = 5

args.beta = 1.0
args.cutmix_prob = 0.5

In [33]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=2)

(200840, 6)
(200840, 32332)
(160678, 6) (40162, 6)


In [34]:
#for param in model.backbone.parameters():
#    param.requires_grad = False

In [35]:
train(args)

model file: ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth...
model file: ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth, exist: True
loading ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth...

val: {'recall': 0.990337, 'recall_grapheme': 0.987332, 'recall_vowel': 0.994154, 'recall_consonant': 0.99253, 'recall_word': 0.986458, 'acc_grapheme': 0.986181, 'acc_vowel': 0.994796, 'acc_consonant': 0.994771, 'acc_word': 0.986505, 'loss_grapheme': 0.060364, 'loss_vowel': 0.027494, 'loss_consonant': 0.025339, 'loss_word': 0.056859}
CYCLE: 1
    0 | 0.000030 | 159744/160678 | 9.4789 | 7.5058 ||
val: {'recall': 0.9897, 'recall_grapheme': 0.98597, 'recall_vowel': 0.994533, 'recall_consonant': 0.992325, 'recall_word': 0.984844, 'acc_grapheme': 0.984861, 'acc_vowel': 0.994771, 'acc_consonant': 0.994273, 'acc_word': 0.984787, 'loss_grapheme': 0.093394, 'loss_vowel': 0.048771, 'loss_consonant': 0.039665, 'lo

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.988936, 'recall_grapheme': 0.985005, 'recall_vowel': 0.994368, 'recall_consonant': 0.991367, 'recall_word': 0.983945, 'acc_grapheme': 0.983318, 'acc_vowel': 0.994672, 'acc_consonant': 0.993701, 'acc_word': 0.983865, 'loss_grapheme': 0.086683, 'loss_vowel': 0.04361, 'loss_consonant': 0.037653, 'loss_word': 0.08012}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.16it/s]



val: {'recall': 0.990259, 'recall_grapheme': 0.987308, 'recall_vowel': 0.994358, 'recall_consonant': 0.992061, 'recall_word': 0.985989, 'acc_grapheme': 0.986355, 'acc_vowel': 0.994771, 'acc_consonant': 0.994622, 'acc_word': 0.986007, 'loss_grapheme': 0.050811, 'loss_vowel': 0.021157, 'loss_consonant': 0.018222, 'loss_word': 0.050973}
   20 | 0.000269 | 159744/160678 | 1.3526 | 8.2948 |||
val: {'recall': 0.989608, 'recall_grapheme': 0.985785, 'recall_vowel': 0.994008, 'recall_consonant': 0.992855, 'recall_word': 0.984754, 'acc_grapheme': 0.984986, 'acc_vowel': 0.994622, 'acc_consonant': 0.993974, 'acc_word': 0.984712, 'loss_grapheme': 0.089215, 'loss_vowel': 0.048105, 'loss_consonant': 0.039147, 'loss_word': 0.077621}
   21 | 0.000266 | 159744/160678 | 17.4389 | 7.9859 |
val: {'recall': 0.98894, 'recall_grapheme': 0.984428, 'recall_vowel': 0.993729, 'recall_consonant': 0.993175, 'recall_word': 0.983352, 'acc_grapheme': 0.982919, 'acc_vowel': 0.994323, 'acc_consonant': 0.993725, 'acc_wo

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.988296, 'recall_grapheme': 0.98345, 'recall_vowel': 0.993721, 'recall_consonant': 0.992562, 'recall_word': 0.982441, 'acc_grapheme': 0.982421, 'acc_vowel': 0.994149, 'acc_consonant': 0.993576, 'acc_word': 0.982371, 'loss_grapheme': 0.136336, 'loss_vowel': 0.071931, 'loss_consonant': 0.059342, 'loss_word': 0.121907}
SWA>>>:


100%|██████████| 156/156 [01:17<00:00,  2.19it/s]



val: {'recall': 0.990523, 'recall_grapheme': 0.987284, 'recall_vowel': 0.994374, 'recall_consonant': 0.993151, 'recall_word': 0.98644, 'acc_grapheme': 0.986505, 'acc_vowel': 0.994796, 'acc_consonant': 0.994921, 'acc_word': 0.98653, 'loss_grapheme': 0.04935, 'loss_vowel': 0.020715, 'loss_consonant': 0.017762, 'loss_word': 0.049699}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   25 | 0.000253 | 159744/160678 | 1.8741 | 9.1208 |||
val: {'recall': 0.989504, 'recall_grapheme': 0.985249, 'recall_vowel': 0.994153, 'recall_consonant': 0.993362, 'recall_word': 0.984132, 'acc_grapheme': 0.98389, 'acc_vowel': 0.994647, 'acc_consonant': 0.993626, 'acc_word': 0.984189, 'loss_grapheme': 0.12757, 'loss_vowel': 0.075535, 'loss_consonant': 0.057451, 'loss_word': 0.107479}
   26 | 0.000249 | 159744/160678 | 8.0163 | 8.0592 ||
val: {'recall': 0.989168, 'recall_grapheme': 0.985324, 'recall_vowel': 0.993484, 'recall_consonant': 0.992538, 'recall_word': 0.983632, 'acc_grapheme': 0.

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.9889, 'recall_grapheme': 0.985202, 'recall_vowel': 0.99384, 'recall_consonant': 0.991354, 'recall_word': 0.98418, 'acc_grapheme': 0.984314, 'acc_vowel': 0.994423, 'acc_consonant': 0.994099, 'acc_word': 0.984164, 'loss_grapheme': 0.092515, 'loss_vowel': 0.050087, 'loss_consonant': 0.041369, 'loss_word': 0.082299}
SWA>>>:


100%|██████████| 156/156 [01:17<00:00,  2.26it/s]



val: {'recall': 0.990405, 'recall_grapheme': 0.987344, 'recall_vowel': 0.994453, 'recall_consonant': 0.992477, 'recall_word': 0.986534, 'acc_grapheme': 0.986679, 'acc_vowel': 0.994746, 'acc_consonant': 0.994871, 'acc_word': 0.986629, 'loss_grapheme': 0.048845, 'loss_vowel': 0.020465, 'loss_consonant': 0.017512, 'loss_word': 0.049324}
   30 | 0.000234 | 159744/160678 | 4.0542 | 8.5301 |||
val: {'recall': 0.988301, 'recall_grapheme': 0.98392, 'recall_vowel': 0.993878, 'recall_consonant': 0.991488, 'recall_word': 0.982791, 'acc_grapheme': 0.982795, 'acc_vowel': 0.994223, 'acc_consonant': 0.993377, 'acc_word': 0.982595, 'loss_grapheme': 0.131153, 'loss_vowel': 0.066151, 'loss_consonant': 0.052463, 'loss_word': 0.115094}
   31 | 0.000230 | 159744/160678 | 7.5961 | 7.6611 |||
val: {'recall': 0.988946, 'recall_grapheme': 0.985046, 'recall_vowel': 0.993665, 'recall_consonant': 0.992026, 'recall_word': 0.983982, 'acc_grapheme': 0.983766, 'acc_vowel': 0.994398, 'acc_consonant': 0.994074, 'acc_w

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989462, 'recall_grapheme': 0.985308, 'recall_vowel': 0.99446, 'recall_consonant': 0.992773, 'recall_word': 0.984418, 'acc_grapheme': 0.984463, 'acc_vowel': 0.994821, 'acc_consonant': 0.994099, 'acc_word': 0.984438, 'loss_grapheme': 0.087647, 'loss_vowel': 0.042956, 'loss_consonant': 0.036811, 'loss_word': 0.07858}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.22it/s]



val: {'recall': 0.990528, 'recall_grapheme': 0.987228, 'recall_vowel': 0.994569, 'recall_consonant': 0.993086, 'recall_word': 0.986522, 'acc_grapheme': 0.986754, 'acc_vowel': 0.994846, 'acc_consonant': 0.994945, 'acc_word': 0.986629, 'loss_grapheme': 0.048186, 'loss_vowel': 0.020125, 'loss_consonant': 0.017279, 'loss_word': 0.048626}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   35 | 0.000214 | 159744/160678 | 14.2309 | 8.0334 ||
val: {'recall': 0.989974, 'recall_grapheme': 0.985437, 'recall_vowel': 0.994838, 'recall_consonant': 0.994185, 'recall_word': 0.984454, 'acc_grapheme': 0.984214, 'acc_vowel': 0.99497, 'acc_consonant': 0.993999, 'acc_word': 0.984388, 'loss_grapheme': 0.112981, 'loss_vowel': 0.064195, 'loss_consonant': 0.05201, 'loss_word': 0.100207}
   36 | 0.000210 | 159744/160678 | 19.6993 | 7.6900 |
val: {'recall': 0.98959, 'recall_grapheme': 0.985444, 'recall_vowel': 0.993779, 'recall_consonant': 0.993694, 'recall_word': 0.984512, 'acc_grapheme': 

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989023, 'recall_grapheme': 0.984342, 'recall_vowel': 0.994443, 'recall_consonant': 0.992963, 'recall_word': 0.983748, 'acc_grapheme': 0.983118, 'acc_vowel': 0.994696, 'acc_consonant': 0.99375, 'acc_word': 0.983691, 'loss_grapheme': 0.17906, 'loss_vowel': 0.121054, 'loss_consonant': 0.082317, 'loss_word': 0.16104}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.20it/s]



val: {'recall': 0.990482, 'recall_grapheme': 0.987076, 'recall_vowel': 0.994555, 'recall_consonant': 0.993222, 'recall_word': 0.986593, 'acc_grapheme': 0.986779, 'acc_vowel': 0.994821, 'acc_consonant': 0.99502, 'acc_word': 0.986704, 'loss_grapheme': 0.04847, 'loss_vowel': 0.020214, 'loss_consonant': 0.017444, 'loss_word': 0.048813}
   40 | 0.000192 | 159744/160678 | 1.3636 | 8.1943 |||
val: {'recall': 0.989221, 'recall_grapheme': 0.985267, 'recall_vowel': 0.994034, 'recall_consonant': 0.992315, 'recall_word': 0.984422, 'acc_grapheme': 0.984289, 'acc_vowel': 0.994672, 'acc_consonant': 0.994198, 'acc_word': 0.984413, 'loss_grapheme': 0.094775, 'loss_vowel': 0.050315, 'loss_consonant': 0.041422, 'loss_word': 0.083259}
   41 | 0.000187 | 159744/160678 | 11.6867 | 8.1244 |
val: {'recall': 0.989693, 'recall_grapheme': 0.985799, 'recall_vowel': 0.994763, 'recall_consonant': 0.992412, 'recall_word': 0.985061, 'acc_grapheme': 0.985085, 'acc_vowel': 0.99512, 'acc_consonant': 0.994348, 'acc_word

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989256, 'recall_grapheme': 0.985324, 'recall_vowel': 0.994337, 'recall_consonant': 0.992042, 'recall_word': 0.983813, 'acc_grapheme': 0.98384, 'acc_vowel': 0.994622, 'acc_consonant': 0.993427, 'acc_word': 0.983741, 'loss_grapheme': 0.104321, 'loss_vowel': 0.054441, 'loss_consonant': 0.042862, 'loss_word': 0.093389}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.25it/s]



val: {'recall': 0.990604, 'recall_grapheme': 0.987275, 'recall_vowel': 0.994623, 'recall_consonant': 0.993244, 'recall_word': 0.986541, 'acc_grapheme': 0.986928, 'acc_vowel': 0.994821, 'acc_consonant': 0.99502, 'acc_word': 0.986629, 'loss_grapheme': 0.048175, 'loss_vowel': 0.020155, 'loss_consonant': 0.017371, 'loss_word': 0.048402}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   45 | 0.000169 | 159744/160678 | 9.8821 | 8.0816 ||
val: {'recall': 0.988767, 'recall_grapheme': 0.984553, 'recall_vowel': 0.99404, 'recall_consonant': 0.991923, 'recall_word': 0.983402, 'acc_grapheme': 0.983168, 'acc_vowel': 0.994497, 'acc_consonant': 0.993775, 'acc_word': 0.983293, 'loss_grapheme': 0.142123, 'loss_vowel': 0.083155, 'loss_consonant': 0.061987, 'loss_word': 0.122737}
   46 | 0.000164 | 159744/160678 | 14.7486 | 7.7555 |
val: {'recall': 0.988536, 'recall_grapheme': 0.984106, 'recall_vowel': 0.994245, 'recall_consonant': 0.991688, 'recall_word': 0.983444, 'acc_grapheme': 

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989493, 'recall_grapheme': 0.985365, 'recall_vowel': 0.994385, 'recall_consonant': 0.992858, 'recall_word': 0.983786, 'acc_grapheme': 0.983691, 'acc_vowel': 0.994597, 'acc_consonant': 0.993725, 'acc_word': 0.983716, 'loss_grapheme': 0.115793, 'loss_vowel': 0.066759, 'loss_consonant': 0.051506, 'loss_word': 0.103687}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.09it/s]



val: {'recall': 0.99067, 'recall_grapheme': 0.987273, 'recall_vowel': 0.994997, 'recall_consonant': 0.993139, 'recall_word': 0.986673, 'acc_grapheme': 0.987028, 'acc_vowel': 0.994896, 'acc_consonant': 0.995045, 'acc_word': 0.986754, 'loss_grapheme': 0.04789, 'loss_vowel': 0.020027, 'loss_consonant': 0.017247, 'loss_word': 0.048142}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   50 | 0.000145 | 159744/160678 | 8.1429 | 8.1141 |||
val: {'recall': 0.988597, 'recall_grapheme': 0.983627, 'recall_vowel': 0.994229, 'recall_consonant': 0.992904, 'recall_word': 0.982406, 'acc_grapheme': 0.982322, 'acc_vowel': 0.994522, 'acc_consonant': 0.993302, 'acc_word': 0.982247, 'loss_grapheme': 0.142553, 'loss_vowel': 0.072437, 'loss_consonant': 0.058442, 'loss_word': 0.127221}
   51 | 0.000141 | 159744/160678 | 6.6190 | 8.6466 ||
val: {'recall': 0.989179, 'recall_grapheme': 0.984598, 'recall_vowel': 0.994363, 'recall_consonant': 0.993156, 'recall_word': 0.983238, 'acc_grapheme':

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.988859, 'recall_grapheme': 0.984429, 'recall_vowel': 0.993731, 'recall_consonant': 0.992847, 'recall_word': 0.982388, 'acc_grapheme': 0.982745, 'acc_vowel': 0.994398, 'acc_consonant': 0.993427, 'acc_word': 0.982197, 'loss_grapheme': 0.15828, 'loss_vowel': 0.089957, 'loss_consonant': 0.06743, 'loss_word': 0.142371}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.09it/s]



val: {'recall': 0.990735, 'recall_grapheme': 0.987324, 'recall_vowel': 0.994994, 'recall_consonant': 0.993297, 'recall_word': 0.986861, 'acc_grapheme': 0.987077, 'acc_vowel': 0.994896, 'acc_consonant': 0.995145, 'acc_word': 0.986953, 'loss_grapheme': 0.047679, 'loss_vowel': 0.019918, 'loss_consonant': 0.017206, 'loss_word': 0.047904}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   55 | 0.000122 | 159744/160678 | 8.7599 | 7.8213 |||
val: {'recall': 0.988512, 'recall_grapheme': 0.983831, 'recall_vowel': 0.994164, 'recall_consonant': 0.992221, 'recall_word': 0.982757, 'acc_grapheme': 0.982844, 'acc_vowel': 0.994472, 'acc_consonant': 0.993452, 'acc_word': 0.982571, 'loss_grapheme': 0.093986, 'loss_vowel': 0.041215, 'loss_consonant': 0.037965, 'loss_word': 0.086272}
   56 | 0.000117 | 159744/160678 | 1.5579 | 7.5881 |||
val: {'recall': 0.989571, 'recall_grapheme': 0.985042, 'recall_vowel': 0.994847, 'recall_consonant': 0.993353, 'recall_word': 0.984827, 'acc_graphem

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.988833, 'recall_grapheme': 0.984511, 'recall_vowel': 0.994377, 'recall_consonant': 0.991932, 'recall_word': 0.98303, 'acc_grapheme': 0.983218, 'acc_vowel': 0.994771, 'acc_consonant': 0.993651, 'acc_word': 0.982894, 'loss_grapheme': 0.121664, 'loss_vowel': 0.063489, 'loss_consonant': 0.047925, 'loss_word': 0.11098}
SWA>>>:


100%|██████████| 156/156 [01:17<00:00,  2.22it/s]



val: {'recall': 0.990658, 'recall_grapheme': 0.987222, 'recall_vowel': 0.994908, 'recall_consonant': 0.993281, 'recall_word': 0.98685, 'acc_grapheme': 0.987003, 'acc_vowel': 0.994921, 'acc_consonant': 0.99512, 'acc_word': 0.986953, 'loss_grapheme': 0.047559, 'loss_vowel': 0.019896, 'loss_consonant': 0.017145, 'loss_word': 0.047745}
   60 | 0.000099 | 159744/160678 | 4.6438 | 8.5090 ||
val: {'recall': 0.988982, 'recall_grapheme': 0.984513, 'recall_vowel': 0.994432, 'recall_consonant': 0.99247, 'recall_word': 0.983044, 'acc_grapheme': 0.98277, 'acc_vowel': 0.994398, 'acc_consonant': 0.993725, 'acc_word': 0.982844, 'loss_grapheme': 0.146558, 'loss_vowel': 0.073358, 'loss_consonant': 0.058898, 'loss_word': 0.128465}
   61 | 0.000095 | 159744/160678 | 10.6517 | 7.0547 |
val: {'recall': 0.989536, 'recall_grapheme': 0.985423, 'recall_vowel': 0.994394, 'recall_consonant': 0.992903, 'recall_word': 0.984706, 'acc_grapheme': 0.984662, 'acc_vowel': 0.994896, 'acc_consonant': 0.994373, 'acc_word':

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989267, 'recall_grapheme': 0.98476, 'recall_vowel': 0.994137, 'recall_consonant': 0.99341, 'recall_word': 0.984396, 'acc_grapheme': 0.98394, 'acc_vowel': 0.994846, 'acc_consonant': 0.994223, 'acc_word': 0.984338, 'loss_grapheme': 0.089384, 'loss_vowel': 0.046607, 'loss_consonant': 0.036563, 'loss_word': 0.081633}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.21it/s]



val: {'recall': 0.990633, 'recall_grapheme': 0.987103, 'recall_vowel': 0.995025, 'recall_consonant': 0.993301, 'recall_word': 0.986872, 'acc_grapheme': 0.986853, 'acc_vowel': 0.994921, 'acc_consonant': 0.995219, 'acc_word': 0.986978, 'loss_grapheme': 0.047322, 'loss_vowel': 0.019783, 'loss_consonant': 0.017022, 'loss_word': 0.047532}
   65 | 0.000078 | 159744/160678 | 18.8463 | 7.9064 ||
val: {'recall': 0.988745, 'recall_grapheme': 0.983924, 'recall_vowel': 0.994189, 'recall_consonant': 0.992942, 'recall_word': 0.983294, 'acc_grapheme': 0.983069, 'acc_vowel': 0.994522, 'acc_consonant': 0.9939, 'acc_word': 0.983218, 'loss_grapheme': 0.137053, 'loss_vowel': 0.074878, 'loss_consonant': 0.056614, 'loss_word': 0.118922}
   66 | 0.000074 | 159744/160678 | 20.9526 | 8.9481 |
val: {'recall': 0.989395, 'recall_grapheme': 0.985209, 'recall_vowel': 0.994419, 'recall_consonant': 0.992742, 'recall_word': 0.983909, 'acc_grapheme': 0.983865, 'acc_vowel': 0.994896, 'acc_consonant': 0.994099, 'acc_wor

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.988821, 'recall_grapheme': 0.984254, 'recall_vowel': 0.994623, 'recall_consonant': 0.992151, 'recall_word': 0.98317, 'acc_grapheme': 0.982994, 'acc_vowel': 0.994846, 'acc_consonant': 0.993925, 'acc_word': 0.982969, 'loss_grapheme': 0.157407, 'loss_vowel': 0.088928, 'loss_consonant': 0.065717, 'loss_word': 0.138219}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.19it/s]



val: {'recall': 0.990765, 'recall_grapheme': 0.987321, 'recall_vowel': 0.995107, 'recall_consonant': 0.993313, 'recall_word': 0.9869, 'acc_grapheme': 0.987028, 'acc_vowel': 0.99497, 'acc_consonant': 0.995269, 'acc_word': 0.987003, 'loss_grapheme': 0.047143, 'loss_vowel': 0.019707, 'loss_consonant': 0.016946, 'loss_word': 0.047352}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   70 | 0.000058 | 159744/160678 | 1.4352 | 7.4325 |||
val: {'recall': 0.989979, 'recall_grapheme': 0.986158, 'recall_vowel': 0.995098, 'recall_consonant': 0.992499, 'recall_word': 0.985187, 'acc_grapheme': 0.985235, 'acc_vowel': 0.99517, 'acc_consonant': 0.994647, 'acc_word': 0.985235, 'loss_grapheme': 0.082649, 'loss_vowel': 0.04285, 'loss_consonant': 0.035072, 'loss_word': 0.074061}
   71 | 0.000054 | 159744/160678 | 9.1432 | 8.8498 ||
val: {'recall': 0.989769, 'recall_grapheme': 0.985506, 'recall_vowel': 0.994795, 'recall_consonant': 0.993267, 'recall_word': 0.984611, 'acc_grapheme': 0.

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.990407, 'recall_grapheme': 0.986887, 'recall_vowel': 0.994903, 'recall_consonant': 0.992953, 'recall_word': 0.986495, 'acc_grapheme': 0.986181, 'acc_vowel': 0.99517, 'acc_consonant': 0.99497, 'acc_word': 0.986554, 'loss_grapheme': 0.071698, 'loss_vowel': 0.036856, 'loss_consonant': 0.030908, 'loss_word': 0.065154}
SWA>>>:


100%|██████████| 156/156 [01:19<00:00,  2.10it/s]



val: {'recall': 0.990837, 'recall_grapheme': 0.987494, 'recall_vowel': 0.99501, 'recall_consonant': 0.993349, 'recall_word': 0.986901, 'acc_grapheme': 0.987052, 'acc_vowel': 0.994995, 'acc_consonant': 0.995319, 'acc_word': 0.987003, 'loss_grapheme': 0.046995, 'loss_vowel': 0.019641, 'loss_consonant': 0.016824, 'loss_word': 0.047232}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   75 | 0.000041 | 159744/160678 | 9.6152 | 7.6573 ||
val: {'recall': 0.988524, 'recall_grapheme': 0.98377, 'recall_vowel': 0.994311, 'recall_consonant': 0.992246, 'recall_word': 0.982668, 'acc_grapheme': 0.982147, 'acc_vowel': 0.994721, 'acc_consonant': 0.99375, 'acc_word': 0.982496, 'loss_grapheme': 0.173966, 'loss_vowel': 0.099902, 'loss_consonant': 0.074183, 'loss_word': 0.149593}
   76 | 0.000038 | 159744/160678 | 5.6551 | 7.9571 |||
val: {'recall': 0.989448, 'recall_grapheme': 0.98506, 'recall_vowel': 0.994526, 'recall_consonant': 0.993147, 'recall_word': 0.984442, 'acc_grapheme': 0

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989344, 'recall_grapheme': 0.985257, 'recall_vowel': 0.994543, 'recall_consonant': 0.99232, 'recall_word': 0.984742, 'acc_grapheme': 0.984289, 'acc_vowel': 0.99497, 'acc_consonant': 0.994149, 'acc_word': 0.984712, 'loss_grapheme': 0.099024, 'loss_vowel': 0.052992, 'loss_consonant': 0.043358, 'loss_word': 0.085514}
SWA>>>:


100%|██████████| 156/156 [01:19<00:00,  2.27it/s]



val: {'recall': 0.990749, 'recall_grapheme': 0.987258, 'recall_vowel': 0.995147, 'recall_consonant': 0.993331, 'recall_word': 0.986846, 'acc_grapheme': 0.987077, 'acc_vowel': 0.995045, 'acc_consonant': 0.995244, 'acc_word': 0.986928, 'loss_grapheme': 0.046931, 'loss_vowel': 0.019604, 'loss_consonant': 0.016838, 'loss_word': 0.047109}
   80 | 0.000026 | 159744/160678 | 15.3885 | 7.5373 |
val: {'recall': 0.98957, 'recall_grapheme': 0.985631, 'recall_vowel': 0.99444, 'recall_consonant': 0.992579, 'recall_word': 0.984922, 'acc_grapheme': 0.984662, 'acc_vowel': 0.99497, 'acc_consonant': 0.994373, 'acc_word': 0.984911, 'loss_grapheme': 0.101753, 'loss_vowel': 0.056557, 'loss_consonant': 0.044298, 'loss_word': 0.090373}
   81 | 0.000023 | 159744/160678 | 1.7577 | 7.8793 ||
val: {'recall': 0.990522, 'recall_grapheme': 0.986996, 'recall_vowel': 0.994571, 'recall_consonant': 0.993525, 'recall_word': 0.986112, 'acc_grapheme': 0.985783, 'acc_vowel': 0.994945, 'acc_consonant': 0.995145, 'acc_word'

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.98992, 'recall_grapheme': 0.98589, 'recall_vowel': 0.994457, 'recall_consonant': 0.993442, 'recall_word': 0.984912, 'acc_grapheme': 0.984886, 'acc_vowel': 0.994871, 'acc_consonant': 0.994647, 'acc_word': 0.984986, 'loss_grapheme': 0.149554, 'loss_vowel': 0.092069, 'loss_consonant': 0.067512, 'loss_word': 0.130215}
SWA>>>:


100%|██████████| 156/156 [01:17<00:00,  2.17it/s]



val: {'recall': 0.990874, 'recall_grapheme': 0.987456, 'recall_vowel': 0.995082, 'recall_consonant': 0.993501, 'recall_word': 0.98687, 'acc_grapheme': 0.987177, 'acc_vowel': 0.99502, 'acc_consonant': 0.995319, 'acc_word': 0.986928, 'loss_grapheme': 0.046695, 'loss_vowel': 0.019567, 'loss_consonant': 0.016764, 'loss_word': 0.046924}
###>>>>> saved ./model4-ckps/tf_efficientnet_b1/model4_eb1_fold2.pth
   85 | 0.000014 | 159744/160678 | 9.2611 | 8.2875 ||
val: {'recall': 0.989734, 'recall_grapheme': 0.985593, 'recall_vowel': 0.994912, 'recall_consonant': 0.992838, 'recall_word': 0.98458, 'acc_grapheme': 0.984687, 'acc_vowel': 0.995219, 'acc_consonant': 0.994198, 'acc_word': 0.984563, 'loss_grapheme': 0.10875, 'loss_vowel': 0.060622, 'loss_consonant': 0.046919, 'loss_word': 0.092252}
   86 | 0.000012 | 159744/160678 | 9.5796 | 7.8082 |||
val: {'recall': 0.989653, 'recall_grapheme': 0.985559, 'recall_vowel': 0.994945, 'recall_consonant': 0.992549, 'recall_word': 0.98451, 'acc_grapheme': 0.

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989938, 'recall_grapheme': 0.985733, 'recall_vowel': 0.994999, 'recall_consonant': 0.993287, 'recall_word': 0.985276, 'acc_grapheme': 0.985135, 'acc_vowel': 0.995219, 'acc_consonant': 0.994696, 'acc_word': 0.985285, 'loss_grapheme': 0.083325, 'loss_vowel': 0.043152, 'loss_consonant': 0.035369, 'loss_word': 0.073227}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.17it/s]



val: {'recall': 0.990764, 'recall_grapheme': 0.98732, 'recall_vowel': 0.994979, 'recall_consonant': 0.993435, 'recall_word': 0.98691, 'acc_grapheme': 0.987127, 'acc_vowel': 0.995045, 'acc_consonant': 0.995319, 'acc_word': 0.986978, 'loss_grapheme': 0.046645, 'loss_vowel': 0.019548, 'loss_consonant': 0.016726, 'loss_word': 0.046864}
   90 | 0.000006 | 159744/160678 | 1.2949 | 7.9403 ||
val: {'recall': 0.989849, 'recall_grapheme': 0.985834, 'recall_vowel': 0.99495, 'recall_consonant': 0.992777, 'recall_word': 0.98526, 'acc_grapheme': 0.985085, 'acc_vowel': 0.995219, 'acc_consonant': 0.994672, 'acc_word': 0.985309, 'loss_grapheme': 0.093168, 'loss_vowel': 0.050202, 'loss_consonant': 0.040594, 'loss_word': 0.082058}
   91 | 0.000005 | 159744/160678 | 6.5384 | 7.4562 ||
val: {'recall': 0.989665, 'recall_grapheme': 0.985477, 'recall_vowel': 0.994698, 'recall_consonant': 0.993009, 'recall_word': 0.984725, 'acc_grapheme': 0.984587, 'acc_vowel': 0.995095, 'acc_consonant': 0.994497, 'acc_word':

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989938, 'recall_grapheme': 0.98582, 'recall_vowel': 0.994737, 'recall_consonant': 0.993374, 'recall_word': 0.984966, 'acc_grapheme': 0.984886, 'acc_vowel': 0.995045, 'acc_consonant': 0.994672, 'acc_word': 0.984911, 'loss_grapheme': 0.116045, 'loss_vowel': 0.066882, 'loss_consonant': 0.049797, 'loss_word': 0.101202}
SWA>>>:


100%|██████████| 156/156 [01:18<00:00,  2.32it/s]



val: {'recall': 0.99087, 'recall_grapheme': 0.987467, 'recall_vowel': 0.995065, 'recall_consonant': 0.993481, 'recall_word': 0.987022, 'acc_grapheme': 0.987152, 'acc_vowel': 0.994995, 'acc_consonant': 0.995394, 'acc_word': 0.987127, 'loss_grapheme': 0.046701, 'loss_vowel': 0.019486, 'loss_consonant': 0.016681, 'loss_word': 0.046889}
   95 | 0.000001 | 159744/160678 | 1.2975 | 8.5468 |||
val: {'recall': 0.990227, 'recall_grapheme': 0.986365, 'recall_vowel': 0.994869, 'recall_consonant': 0.993309, 'recall_word': 0.985665, 'acc_grapheme': 0.985509, 'acc_vowel': 0.995194, 'acc_consonant': 0.994746, 'acc_word': 0.985683, 'loss_grapheme': 0.07892, 'loss_vowel': 0.041762, 'loss_consonant': 0.034488, 'loss_word': 0.069593}
   96 | 0.000001 | 159744/160678 | 10.4923 | 7.7610 ||
val: {'recall': 0.989153, 'recall_grapheme': 0.984478, 'recall_vowel': 0.994497, 'recall_consonant': 0.993158, 'recall_word': 0.984466, 'acc_grapheme': 0.983766, 'acc_vowel': 0.994921, 'acc_consonant': 0.994223, 'acc_wo

  0%|          | 0/156 [00:00<?, ?it/s]


val: {'recall': 0.989921, 'recall_grapheme': 0.985769, 'recall_vowel': 0.994862, 'recall_consonant': 0.993284, 'recall_word': 0.984913, 'acc_grapheme': 0.984787, 'acc_vowel': 0.995095, 'acc_consonant': 0.994497, 'acc_word': 0.984886, 'loss_grapheme': 0.107947, 'loss_vowel': 0.062297, 'loss_consonant': 0.047194, 'loss_word': 0.092843}
SWA>>>:


100%|██████████| 156/156 [01:19<00:00,  2.19it/s]



val: {'recall': 0.990667, 'recall_grapheme': 0.987063, 'recall_vowel': 0.995106, 'recall_consonant': 0.993435, 'recall_word': 0.987038, 'acc_grapheme': 0.986978, 'acc_vowel': 0.995045, 'acc_consonant': 0.995319, 'acc_word': 0.987127, 'loss_grapheme': 0.046485, 'loss_vowel': 0.019401, 'loss_consonant': 0.016649, 'loss_word': 0.04668}
CYCLE: 2
    0 | 0.000030 | 159744/160678 | 12.8697 | 7.6799 ||
val: {'recall': 0.989102, 'recall_grapheme': 0.984462, 'recall_vowel': 0.99448, 'recall_consonant': 0.993005, 'recall_word': 0.983482, 'acc_grapheme': 0.983318, 'acc_vowel': 0.994547, 'acc_consonant': 0.993925, 'acc_word': 0.983442, 'loss_grapheme': 0.145482, 'loss_vowel': 0.073542, 'loss_consonant': 0.060885, 'loss_word': 0.122437}
    1 | 0.000060 | 159744/160678 | 0.8475 | 6.7534 |||
val: {'recall': 0.990684, 'recall_grapheme': 0.987308, 'recall_vowel': 0.995227, 'recall_consonant': 0.992893, 'recall_word': 0.986735, 'acc_grapheme': 0.986729, 'acc_vowel': 0.995344, 'acc_consonant': 0.995045

KeyboardInterrupt: 

In [None]:
#validate(nn.DataParallel(model), val_loader)

In [None]:
train(args)

In [None]:
#save_model(model, model_file)

In [None]:
del model