In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image
from utils import bn_update, moving_average, copy_model


In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
#!ls /home/chec/data/bengali

In [4]:
DATA_DIR = '/home/chec/data/bengali'

In [5]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [7]:
HEIGHT = 137
WIDTH = 236

In [8]:
from cvcore.data.auto_augment import Invert, RandAugment, AugmentAndMix
from albumentations.core.transforms_interface import ImageOnlyTransform
from augmix import RandomAugMix
import albumentations as albu

class RandAug(ImageOnlyTransform):
    def __init__(self, always_apply=False, p=1.):
        super().__init__(always_apply, p)
        self.aug = RandAugment(n=2, m=27)

    def apply(self, image, **params):
        return np.asarray(self.aug(Image.fromarray(image))).astype(np.uint8)

train_aug = albu.Compose([
    RandAug(p=1.),
    #RandomAugMix(severity=3, width=3, alpha=1., p=1.)
])

In [9]:
def get_train_augs():
    #return RandAugment(n=2, m=27)
    return train_aug

In [10]:
#plt.imshow(x)

In [11]:
#np.arange(10).copy()

In [12]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        orig_img = img.copy()
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            img = augs(image=img)['image'].astype(np.uint8)
            #img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        orig_img = np.expand_dims(orig_img, axis=-1)
        
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        orig_img = transforms.functional.to_tensor(orig_img)
        
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        elif self.train_mode:
            return img, orig_img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic, row.word_label])
                    
    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')

    train_df = shuffle(train_df, random_state=1234)

    grapheme_words = np.unique(train_df.grapheme.values)
    grapheme_words_dict = {grapheme: i for i, grapheme in enumerate(grapheme_words)}
    train_df['word_label'] = train_df['grapheme'].map(lambda x: grapheme_words_dict[x])

    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [13]:
#train_loader, val_loader = get_train_val_loaders()

In [14]:
#for x in train_loader:
#    print(x)
#    break

# model

In [15]:
#import pretrainedmodels

In [16]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [17]:
from argparse import Namespace
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

In [18]:
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

In [19]:
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]
import timm
from timm.models.activations import Swish, Mish
from timm.models.adaptive_avgmax_pool import SelectAdaptivePool2d

class BengaliNet4(nn.Module):
    def __init__(self, backbone_name='se_resnext50_32x4d'):
        super(BengaliNet4, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.n_word = 1295
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant + self.n_word
        
        self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
        self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)
        
        self.num_p2_features = self.backbone.layer2[-1].se_module.fc2.out_channels
        self.num_p3_features = self.backbone.layer3[-1].se_module.fc2.out_channels
        self.p2_head = nn.Conv2d(self.num_p2_features, self.num_p2_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.p3_head = nn.Conv2d(self.num_p3_features, self.num_p3_features * 4, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.bn2 = nn.BatchNorm2d(self.num_p2_features * 4)
        self.bn3 = nn.BatchNorm2d(self.num_p3_features * 4)
        self.act2 = Swish()
        self.act3 = Swish()
        
        self.fc_aux1 = nn.Linear(self.num_p3_features * 4, self.num_classes)
        self.fc_aux2 = nn.Linear(self.num_p2_features * 4, self.num_classes)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        for fc in [self.fc, self.fc_aux1, self.fc_aux2]:
            nn.init.zeros_(fc.bias.data)

        print('init model4')
        
    def features(self, x):
        x = self.backbone.layer0(x); #print(x.size())
        x = self.backbone.layer1(x); #print(x.size())
        x = self.backbone.layer2(x); p2 = x; p2 = self.p2_head(p2); p2 = self.bn2(p2); p2 = self.act2(p2) #print(x.size())
        x = self.backbone.layer3(x); p3 = x; p3 = self.p3_head(p3); p3 = self.bn3(p3); p3 = self.act3(p3) #print(x.size())
        x = self.backbone.layer4(x); #print(x.size())
        return x, p2, p3
        
    def logits(self, x, p2, p3):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = torch.flatten(x, 1)
        
        p2 = self.avg_pool(p2)
        p2 = torch.flatten(p2, 1)
        
        p3 = self.avg_pool(p3)
        p3 = torch.flatten(p3, 1)
        return self.fc(x), self.fc_aux1(p3), self.fc_aux2(p2)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        x, p2, p3 = self.features(x)
        x, logits_aux1, logits_aux2 = self.logits(x, p2, p3)

        return x, logits_aux1, logits_aux2

In [20]:
MODEL_DIR = './model4-ckps'
def create_model(args):
    model = BengaliNet4(args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [21]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [22]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [23]:
round(1/9, 6)

0.111111

In [24]:
import numpy as np
import sklearn.metrics
import torch


def calc_metrics(preds0, preds1, preds2, preds3, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2) == len(preds3)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    recall_word = sklearn.metrics.recall_score(preds3, y[:, 3], average='macro')
    
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    metrics['recall_word'] = round(recall_word, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    metrics['acc_word'] = round((preds3 == y[:, 3]).sum() / len(y), 6)    
    
    return metrics

In [25]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    loss3 = F.cross_entropy(outputs[3], y_true[:, 3], reduction='mean')
    
    return loss0 + loss1 + loss2 + loss3 #, loss0.item(), loss1.item(), loss2.item()
    #return loss3

In [26]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2, loss3 = 0., 0., 0., 0.
    preds0, preds1, preds2, preds3 = [], [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs, outputs_aux1, outputs_aux2 = model(x)
            #avg_outputs = torch.mean(torch.stack([outputs, outputs_aux1, outputs_aux2], 0), 0)
            outputs = torch.split(outputs, [168, 11, 7, 1295], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            preds3.append(torch.max(outputs[3], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            loss3 += F.cross_entropy(outputs[3], y[:, 3], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    preds3 = torch.cat(preds3, 0).cpu().numpy()
    
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, preds3, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    metrics['loss_word'] = round(loss3 / val_loader.num, 6)
    
    return metrics
            

In [27]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [28]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [29]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [30]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [31]:
np.random.random()

0.4172528007108527

In [32]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from gridmask import GridMask

In [33]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [34]:
def train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid):
    train_loss = 0

    for batch_idx, (img, orig_img, targets) in enumerate(train_loader):
        img, orig_img, targets  = img.cuda(), orig_img.cuda(), targets.cuda()
        batch_size = img.size(0)
        r = np.random.rand()

        if r < 0.5:
            # generate mixed sample
            lam = np.random.beta(args.beta, args.beta)
            rand_index = torch.randperm(img.size()[0]).cuda()
            target_a = targets
            target_b = targets[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
            #img[:, :, bby1:bby2, bbx1:bbx2] = img[rand_index, :, bby1:bby2, bbx1:bbx2] #for new cutmix
            img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
            
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
            # compute output
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            loss_aux1 = criterion(outputs_aux1, target_a) * lam + criterion(outputs_aux1, target_b) * (1. - lam)
            loss_aux2 = criterion(outputs_aux2, target_a) * lam + criterion(outputs_aux2, target_b) * (1. - lam)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        elif r > 1.9: # normal train
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, targets)
            loss_aux1 = criterion(outputs_aux1, targets)
            loss_aux2 = criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        elif r > 1.7: # grid mask
            img = grid(img)
            outputs, outputs_aux1, outputs_aux2 = model(img)
            loss_primary = criterion(outputs, targets)
            loss_aux1 = criterion(outputs_aux1, targets)
            loss_aux2 = criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
        else: # mixup
            orig_img, targets = mixup(orig_img, targets)
            outputs, outputs_aux1, outputs_aux2 = model(orig_img)
            loss_primary = mixup_criterion(outputs, targets)
            loss_aux1 = mixup_criterion(outputs_aux1, targets)
            loss_aux2 = mixup_criterion(outputs_aux2, targets)
            loss = loss_primary + (loss_aux1 + loss_aux2)*0.8
            #loss = criterion(outputs, targets)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        #loss.backward()
        lr_scheduler(optimizer, batch_idx, epoch)
        optimizer.step()            
        
        current_lr = get_lrs(optimizer)

        train_loss += loss.item()
        print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
            epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
            loss.item(), train_loss/(batch_idx+1)), end='')


In [35]:
best_metrics = 0.

def validate_and_save(model, model_file, val_loader, save=False):
    global best_metrics
    best_key = 'recall'
    val_metrics = validate(model, val_loader)
    print('\nval:', val_metrics)
    
    if val_metrics[best_key] > best_metrics:
        best_metrics = val_metrics[best_key]
        if save:
            save_model(model, model_file)
            print('###>>>>> saved', model_file)
    model.train()
    

def train(args):
    model, model_file = create_model(args)
    model = model.cuda()

    swa_model, _ = create_model(args)
    swa_model = swa_model.cuda()
    swa_model_file = model_file

    optimizer = make_optimizer(model)
    lr_scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    [model, swa_model], optimizer = amp.initialize([model, swa_model], optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        swa_model = nn.DataParallel(swa_model)
    
    validate_and_save(model, model_file, val_loader, save=False)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)
        if os.path.exists(model_file):
            print(f'loading {model_file}...')
            model.module.load_state_dict(torch.load(model_file))

        for epoch in range(args.start_epoch, args.num_epochs):
            grid.set_prob(epoch, args.st_epochs)
            train_epoch(args, model, train_loader, epoch, optimizer, lr_scheduler, grid)
            validate_and_save(model, model_file, val_loader, save=True)
            
            if (epoch+1) == args.swa_start and cycle == 1:
                copy_model(swa_model, model)
                swa_n = 0
            if (epoch+1) >= args.swa_start and (epoch+1) % args.swa_freq == 0:
                print('SWA>>>:')
                moving_average(swa_model, model, 1.0 / (swa_n + 1))
                swa_n += 1
                bn_update(train_loader, swa_model)
                validate_and_save(swa_model, swa_model_file, val_loader, save=True)

        #args.base_lr = 1.5e-4
        #args.num_epochs = 80
        #args.warmup_epochs = 10
        # reset scheduler at each cycle
        lr_scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

In [36]:
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'model4_se_resnext50_fold3_224.pth'

args.base_lr = 4e-5
args.num_epochs = 60
args.start_epoch = 0
args.warmup_epochs = 0

args.num_cycles = 100
args.batch_size = 640
args.val_batch_size = 1024
args.st_epochs = 10

args.swa_start = 0
args.swa_freq = 3

args.beta = 1.0
args.cutmix_prob = 0.5

In [37]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=3)

(200840, 6)
(200840, 32332)
(160716, 6) (40124, 6)


In [38]:
train(args)

init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth...
init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth...

val: {'recall': 0.990901, 'recall_grapheme': 0.986626, 'recall_vowel': 0.994607, 'recall_consonant': 0.995745, 'recall_word': 0.983622, 'acc_grapheme': 0.985445, 'acc_vowel': 0.995265, 'acc_consonant': 0.995863, 'acc_word': 0.983601, 'loss_grapheme': 0.066175, 'loss_vowel': 0.028702, 'loss_consonant': 0.022095, 'loss_word': 0.070521}
CYCLE: 1
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth...
    0 | 0.000040 | 160640/160716 | 7.8883 | 11.1346 ||
val: {'recall': 0.988418, 'recall_grapheme': 0.982352, 'recall_vowel': 0.993159, 'recall_consonant': 0.99581, 'recall_word': 0.980183, 'acc_grapheme': 0.9815

KeyboardInterrupt: 

In [38]:
#for param in model.backbone.parameters():
#    param.requires_grad = False

In [39]:
#validate(nn.DataParallel(model), val_loader)

In [40]:
train(args)

init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth...
init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth...

val: {'recall': 0.973574, 'recall_grapheme': 0.958259, 'recall_vowel': 0.986701, 'recall_consonant': 0.991078, 'recall_word': 0.946792, 'acc_grapheme': 0.955563, 'acc_vowel': 0.986916, 'acc_consonant': 0.982853, 'acc_word': 0.940609, 'loss_grapheme': 0.505266, 'loss_vowel': 0.242785, 'loss_consonant': 0.176852, 'loss_word': 0.689013}
CYCLE: 1
    0 | 0.000040 | 160640/160716 | 10.7314 | 13.5860 |
val: {'recall': 0.983047, 'recall_grapheme': 0.975177, 'recall_vowel': 0.990518, 'recall_consonant': 0.991315, 'recall_word': 0.974583, 'acc_grapheme': 0.974554, 'acc_vowel': 0.992025, 'acc_consonant': 0.991825, 'acc_word': 0.974355, '

  0%|          | 0/251 [00:00<?, ?it/s]

###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  2.98it/s]



val: {'recall': 0.988702, 'recall_grapheme': 0.983408, 'recall_vowel': 0.994278, 'recall_consonant': 0.993713, 'recall_word': 0.981235, 'acc_grapheme': 0.981981, 'acc_vowel': 0.994467, 'acc_consonant': 0.994367, 'acc_word': 0.981158, 'loss_grapheme': 0.084338, 'loss_vowel': 0.036426, 'loss_consonant': 0.030505, 'loss_word': 0.084398}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   15 | 0.000167 | 160640/160716 | 10.2000 | 12.1626 |
val: {'recall': 0.986645, 'recall_grapheme': 0.980716, 'recall_vowel': 0.991359, 'recall_consonant': 0.993791, 'recall_word': 0.977962, 'acc_grapheme': 0.978517, 'acc_vowel': 0.993595, 'acc_consonant': 0.993545, 'acc_word': 0.97752, 'loss_grapheme': 0.193478, 'loss_vowel': 0.129462, 'loss_consonant': 0.094164, 'loss_word': 0.178724}
   16 | 0.000163 | 160640/160716 | 8.9220 | 11.8400 ||
val: {'recall': 0.98634, 'recall_grapheme': 0.9793, 'recall_vowel': 0.992438, 'recall_consonant': 0.994322, 'recall_word': 0.978441, 'ac

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.987666, 'recall_grapheme': 0.981507, 'recall_vowel': 0.99235, 'recall_consonant': 0.995301, 'recall_word': 0.979606, 'acc_grapheme': 0.979987, 'acc_vowel': 0.993495, 'acc_consonant': 0.993346, 'acc_word': 0.979214, 'loss_grapheme': 0.226765, 'loss_vowel': 0.170005, 'loss_consonant': 0.120342, 'loss_word': 0.211435}
SWA>>>:


100%|██████████| 251/251 [01:25<00:00,  3.07it/s]



val: {'recall': 0.988941, 'recall_grapheme': 0.983948, 'recall_vowel': 0.99339, 'recall_consonant': 0.994479, 'recall_word': 0.981403, 'acc_grapheme': 0.982579, 'acc_vowel': 0.994318, 'acc_consonant': 0.994766, 'acc_word': 0.981308, 'loss_grapheme': 0.08397, 'loss_vowel': 0.037254, 'loss_consonant': 0.030794, 'loss_word': 0.083673}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   18 | 0.000154 | 160640/160716 | 9.3672 | 11.1293 ||
val: {'recall': 0.987044, 'recall_grapheme': 0.980198, 'recall_vowel': 0.99238, 'recall_consonant': 0.9954, 'recall_word': 0.9777, 'acc_grapheme': 0.978292, 'acc_vowel': 0.99342, 'acc_consonant': 0.993794, 'acc_word': 0.97732, 'loss_grapheme': 0.172029, 'loss_vowel': 0.133502, 'loss_consonant': 0.089886, 'loss_word': 0.17217}
   19 | 0.000150 | 160640/160716 | 19.4221 | 11.9090 |
val: {'recall': 0.986956, 'recall_grapheme': 0.980921, 'recall_vowel': 0.992808, 'recall_consonant': 0.993172, 'recall_word': 0.979373, 'acc_grap

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.987042, 'recall_grapheme': 0.981626, 'recall_vowel': 0.992164, 'recall_consonant': 0.992751, 'recall_word': 0.97859, 'acc_grapheme': 0.979364, 'acc_vowel': 0.993919, 'acc_consonant': 0.99357, 'acc_word': 0.978442, 'loss_grapheme': 0.187271, 'loss_vowel': 0.142829, 'loss_consonant': 0.100332, 'loss_word': 0.178249}
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  3.04it/s]



val: {'recall': 0.989488, 'recall_grapheme': 0.98468, 'recall_vowel': 0.993293, 'recall_consonant': 0.995299, 'recall_word': 0.981651, 'acc_grapheme': 0.983576, 'acc_vowel': 0.994492, 'acc_consonant': 0.995115, 'acc_word': 0.981682, 'loss_grapheme': 0.083424, 'loss_vowel': 0.038647, 'loss_consonant': 0.031227, 'loss_word': 0.083663}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   21 | 0.000141 | 160640/160716 | 11.5016 | 12.0601 |
val: {'recall': 0.987622, 'recall_grapheme': 0.980897, 'recall_vowel': 0.993378, 'recall_consonant': 0.995315, 'recall_word': 0.979716, 'acc_grapheme': 0.979788, 'acc_vowel': 0.993944, 'acc_consonant': 0.993695, 'acc_word': 0.979538, 'loss_grapheme': 0.193942, 'loss_vowel': 0.147146, 'loss_consonant': 0.101736, 'loss_word': 0.166479}
   22 | 0.000136 | 160640/160716 | 8.4757 | 11.1659 ||
val: {'recall': 0.987305, 'recall_grapheme': 0.981354, 'recall_vowel': 0.992829, 'recall_consonant': 0.993681, 'recall_word': 0.979449, 

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.987401, 'recall_grapheme': 0.980943, 'recall_vowel': 0.993101, 'recall_consonant': 0.994615, 'recall_word': 0.978159, 'acc_grapheme': 0.980112, 'acc_vowel': 0.993794, 'acc_consonant': 0.993595, 'acc_word': 0.977769, 'loss_grapheme': 0.20616, 'loss_vowel': 0.157129, 'loss_consonant': 0.104625, 'loss_word': 0.20009}
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  3.02it/s]



val: {'recall': 0.989533, 'recall_grapheme': 0.984659, 'recall_vowel': 0.993426, 'recall_consonant': 0.995389, 'recall_word': 0.981813, 'acc_grapheme': 0.983676, 'acc_vowel': 0.994567, 'acc_consonant': 0.995165, 'acc_word': 0.981806, 'loss_grapheme': 0.080192, 'loss_vowel': 0.03664, 'loss_consonant': 0.029374, 'loss_word': 0.081544}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   24 | 0.000126 | 160640/160716 | 1.3440 | 11.6876 ||
val: {'recall': 0.987081, 'recall_grapheme': 0.981256, 'recall_vowel': 0.99237, 'recall_consonant': 0.993443, 'recall_word': 0.979072, 'acc_grapheme': 0.979987, 'acc_vowel': 0.993744, 'acc_consonant': 0.994143, 'acc_word': 0.978816, 'loss_grapheme': 0.209714, 'loss_vowel': 0.169065, 'loss_consonant': 0.111517, 'loss_word': 0.185963}
   25 | 0.000121 | 160640/160716 | 5.7105 | 11.2164 ||
val: {'recall': 0.987001, 'recall_grapheme': 0.980176, 'recall_vowel': 0.992903, 'recall_consonant': 0.994749, 'recall_word': 0.978634, '

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.988747, 'recall_grapheme': 0.982562, 'recall_vowel': 0.993919, 'recall_consonant': 0.995946, 'recall_word': 0.981162, 'acc_grapheme': 0.98071, 'acc_vowel': 0.994567, 'acc_consonant': 0.994268, 'acc_word': 0.980884, 'loss_grapheme': 0.202331, 'loss_vowel': 0.155643, 'loss_consonant': 0.110006, 'loss_word': 0.17642}
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  3.06it/s]



val: {'recall': 0.989769, 'recall_grapheme': 0.984897, 'recall_vowel': 0.993836, 'recall_consonant': 0.995448, 'recall_word': 0.982083, 'acc_grapheme': 0.984025, 'acc_vowel': 0.994667, 'acc_consonant': 0.99524, 'acc_word': 0.982081, 'loss_grapheme': 0.077732, 'loss_vowel': 0.035082, 'loss_consonant': 0.028382, 'loss_word': 0.079198}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   27 | 0.000110 | 160640/160716 | 13.9549 | 11.5440 |
val: {'recall': 0.988423, 'recall_grapheme': 0.982369, 'recall_vowel': 0.993573, 'recall_consonant': 0.995382, 'recall_word': 0.980861, 'acc_grapheme': 0.981507, 'acc_vowel': 0.994268, 'acc_consonant': 0.994193, 'acc_word': 0.980585, 'loss_grapheme': 0.197424, 'loss_vowel': 0.152369, 'loss_consonant': 0.100128, 'loss_word': 0.178919}
   28 | 0.000105 | 160640/160716 | 20.4562 | 11.3414 |
val: {'recall': 0.988235, 'recall_grapheme': 0.98197, 'recall_vowel': 0.993417, 'recall_consonant': 0.995585, 'recall_word': 0.981385, '

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.988566, 'recall_grapheme': 0.98248, 'recall_vowel': 0.993836, 'recall_consonant': 0.995471, 'recall_word': 0.980743, 'acc_grapheme': 0.980785, 'acc_vowel': 0.994492, 'acc_consonant': 0.994268, 'acc_word': 0.980535, 'loss_grapheme': 0.186972, 'loss_vowel': 0.146232, 'loss_consonant': 0.097226, 'loss_word': 0.168711}
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  3.01it/s]



val: {'recall': 0.989917, 'recall_grapheme': 0.985231, 'recall_vowel': 0.993873, 'recall_consonant': 0.995331, 'recall_word': 0.982335, 'acc_grapheme': 0.984274, 'acc_vowel': 0.994691, 'acc_consonant': 0.995265, 'acc_word': 0.982355, 'loss_grapheme': 0.076335, 'loss_vowel': 0.034215, 'loss_consonant': 0.027626, 'loss_word': 0.078268}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   30 | 0.000095 | 160640/160716 | 3.6900 | 11.2939 ||
val: {'recall': 0.988657, 'recall_grapheme': 0.983051, 'recall_vowel': 0.99339, 'recall_consonant': 0.995139, 'recall_word': 0.980883, 'acc_grapheme': 0.981133, 'acc_vowel': 0.994318, 'acc_consonant': 0.994293, 'acc_word': 0.980685, 'loss_grapheme': 0.142875, 'loss_vowel': 0.104821, 'loss_consonant': 0.072496, 'loss_word': 0.137048}
   31 | 0.000090 | 160640/160716 | 17.6985 | 11.2147 |
val: {'recall': 0.987797, 'recall_grapheme': 0.981575, 'recall_vowel': 0.993086, 'recall_consonant': 0.994951, 'recall_word': 0.979301, 

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.988051, 'recall_grapheme': 0.982093, 'recall_vowel': 0.992837, 'recall_consonant': 0.995181, 'recall_word': 0.979827, 'acc_grapheme': 0.981009, 'acc_vowel': 0.994243, 'acc_consonant': 0.993944, 'acc_word': 0.979613, 'loss_grapheme': 0.220939, 'loss_vowel': 0.18059, 'loss_consonant': 0.120906, 'loss_word': 0.200075}
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  2.98it/s]



val: {'recall': 0.989954, 'recall_grapheme': 0.985208, 'recall_vowel': 0.994006, 'recall_consonant': 0.995393, 'recall_word': 0.982438, 'acc_grapheme': 0.984299, 'acc_vowel': 0.994816, 'acc_consonant': 0.99529, 'acc_word': 0.982429, 'loss_grapheme': 0.0757, 'loss_vowel': 0.033963, 'loss_consonant': 0.027405, 'loss_word': 0.077937}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   33 | 0.000079 | 160640/160716 | 5.3657 | 11.6975 ||
val: {'recall': 0.988564, 'recall_grapheme': 0.98276, 'recall_vowel': 0.992763, 'recall_consonant': 0.995973, 'recall_word': 0.980879, 'acc_grapheme': 0.981183, 'acc_vowel': 0.993944, 'acc_consonant': 0.994417, 'acc_word': 0.980735, 'loss_grapheme': 0.188327, 'loss_vowel': 0.146538, 'loss_consonant': 0.099563, 'loss_word': 0.165623}
   34 | 0.000074 | 160640/160716 | 6.5991 | 10.9271 ||
val: {'recall': 0.988398, 'recall_grapheme': 0.982649, 'recall_vowel': 0.992656, 'recall_consonant': 0.995638, 'recall_word': 0.980327, 'ac

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.988036, 'recall_grapheme': 0.981939, 'recall_vowel': 0.992927, 'recall_consonant': 0.99534, 'recall_word': 0.98008, 'acc_grapheme': 0.980685, 'acc_vowel': 0.994318, 'acc_consonant': 0.994268, 'acc_word': 0.979838, 'loss_grapheme': 0.208376, 'loss_vowel': 0.168702, 'loss_consonant': 0.11342, 'loss_word': 0.185382}
SWA>>>:


100%|██████████| 251/251 [01:27<00:00,  3.07it/s]



val: {'recall': 0.989919, 'recall_grapheme': 0.985153, 'recall_vowel': 0.993906, 'recall_consonant': 0.995465, 'recall_word': 0.982492, 'acc_grapheme': 0.984299, 'acc_vowel': 0.994791, 'acc_consonant': 0.995364, 'acc_word': 0.982504, 'loss_grapheme': 0.07535, 'loss_vowel': 0.033715, 'loss_consonant': 0.02714, 'loss_word': 0.077525}
   36 | 0.000064 | 160640/160716 | 4.6623 | 11.4520 ||
val: {'recall': 0.988473, 'recall_grapheme': 0.982256, 'recall_vowel': 0.993444, 'recall_consonant': 0.995936, 'recall_word': 0.98095, 'acc_grapheme': 0.980934, 'acc_vowel': 0.994392, 'acc_consonant': 0.994367, 'acc_word': 0.980635, 'loss_grapheme': 0.180546, 'loss_vowel': 0.145638, 'loss_consonant': 0.101986, 'loss_word': 0.158224}
   37 | 0.000059 | 160640/160716 | 4.6603 | 11.2763 ||
val: {'recall': 0.988536, 'recall_grapheme': 0.982693, 'recall_vowel': 0.993071, 'recall_consonant': 0.995688, 'recall_word': 0.981279, 'acc_grapheme': 0.981408, 'acc_vowel': 0.994143, 'acc_consonant': 0.994642, 'acc_wor

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.988748, 'recall_grapheme': 0.982727, 'recall_vowel': 0.993556, 'recall_consonant': 0.995982, 'recall_word': 0.981044, 'acc_grapheme': 0.981781, 'acc_vowel': 0.994417, 'acc_consonant': 0.994816, 'acc_word': 0.980735, 'loss_grapheme': 0.153797, 'loss_vowel': 0.118704, 'loss_consonant': 0.081906, 'loss_word': 0.142304}
SWA>>>:


100%|██████████| 251/251 [01:27<00:00,  2.95it/s]



val: {'recall': 0.990037, 'recall_grapheme': 0.985245, 'recall_vowel': 0.994203, 'recall_consonant': 0.995457, 'recall_word': 0.982489, 'acc_grapheme': 0.984349, 'acc_vowel': 0.994891, 'acc_consonant': 0.995364, 'acc_word': 0.982504, 'loss_grapheme': 0.074544, 'loss_vowel': 0.033431, 'loss_consonant': 0.02678, 'loss_word': 0.077013}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   39 | 0.000050 | 160640/160716 | 15.5516 | 11.1378 |
val: {'recall': 0.989298, 'recall_grapheme': 0.984011, 'recall_vowel': 0.99347, 'recall_consonant': 0.995698, 'recall_word': 0.981245, 'acc_grapheme': 0.982479, 'acc_vowel': 0.994392, 'acc_consonant': 0.994791, 'acc_word': 0.981109, 'loss_grapheme': 0.169957, 'loss_vowel': 0.133094, 'loss_consonant': 0.091798, 'loss_word': 0.155294}
   40 | 0.000046 | 160640/160716 | 18.1405 | 11.0269 |
val: {'recall': 0.988268, 'recall_grapheme': 0.981918, 'recall_vowel': 0.993566, 'recall_consonant': 0.995669, 'recall_word': 0.980974, '

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.98816, 'recall_grapheme': 0.98163, 'recall_vowel': 0.993367, 'recall_consonant': 0.996014, 'recall_word': 0.980551, 'acc_grapheme': 0.980809, 'acc_vowel': 0.994392, 'acc_consonant': 0.994243, 'acc_word': 0.980336, 'loss_grapheme': 0.216032, 'loss_vowel': 0.174316, 'loss_consonant': 0.122033, 'loss_word': 0.184103}
SWA>>>:


100%|██████████| 251/251 [01:25<00:00,  3.04it/s]



val: {'recall': 0.990037, 'recall_grapheme': 0.98529, 'recall_vowel': 0.994094, 'recall_consonant': 0.995473, 'recall_word': 0.982472, 'acc_grapheme': 0.984523, 'acc_vowel': 0.994941, 'acc_consonant': 0.995339, 'acc_word': 0.982504, 'loss_grapheme': 0.073891, 'loss_vowel': 0.033046, 'loss_consonant': 0.026433, 'loss_word': 0.076386}
   42 | 0.000037 | 160640/160716 | 8.3027 | 11.6499 ||
val: {'recall': 0.989279, 'recall_grapheme': 0.983732, 'recall_vowel': 0.994, 'recall_consonant': 0.99565, 'recall_word': 0.981178, 'acc_grapheme': 0.982255, 'acc_vowel': 0.994442, 'acc_consonant': 0.994716, 'acc_word': 0.980884, 'loss_grapheme': 0.147552, 'loss_vowel': 0.112711, 'loss_consonant': 0.078095, 'loss_word': 0.138999}
   43 | 0.000033 | 160640/160716 | 7.7823 | 10.9490 ||
val: {'recall': 0.988428, 'recall_grapheme': 0.982686, 'recall_vowel': 0.993042, 'recall_consonant': 0.995298, 'recall_word': 0.980028, 'acc_grapheme': 0.981482, 'acc_vowel': 0.994093, 'acc_consonant': 0.994567, 'acc_word'

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.989457, 'recall_grapheme': 0.983712, 'recall_vowel': 0.99417, 'recall_consonant': 0.996234, 'recall_word': 0.981621, 'acc_grapheme': 0.982255, 'acc_vowel': 0.994866, 'acc_consonant': 0.994916, 'acc_word': 0.981457, 'loss_grapheme': 0.162869, 'loss_vowel': 0.128537, 'loss_consonant': 0.090099, 'loss_word': 0.14853}
SWA>>>:


100%|██████████| 251/251 [01:27<00:00,  3.11it/s]



val: {'recall': 0.990064, 'recall_grapheme': 0.985282, 'recall_vowel': 0.994185, 'recall_consonant': 0.995507, 'recall_word': 0.982694, 'acc_grapheme': 0.984548, 'acc_vowel': 0.994941, 'acc_consonant': 0.995364, 'acc_word': 0.982704, 'loss_grapheme': 0.073237, 'loss_vowel': 0.03261, 'loss_consonant': 0.026092, 'loss_word': 0.075892}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   45 | 0.000026 | 160640/160716 | 18.1091 | 10.9010 |
val: {'recall': 0.989306, 'recall_grapheme': 0.983393, 'recall_vowel': 0.994337, 'recall_consonant': 0.9961, 'recall_word': 0.980645, 'acc_grapheme': 0.982031, 'acc_vowel': 0.994791, 'acc_consonant': 0.994816, 'acc_word': 0.980436, 'loss_grapheme': 0.160365, 'loss_vowel': 0.12994, 'loss_consonant': 0.089442, 'loss_word': 0.150955}
   46 | 0.000022 | 160640/160716 | 9.4556 | 11.4316 ||
val: {'recall': 0.988405, 'recall_grapheme': 0.982151, 'recall_vowel': 0.993451, 'recall_consonant': 0.995866, 'recall_word': 0.980932, 'ac

  0%|          | 0/251 [00:00<?, ?it/s]


val: {'recall': 0.989321, 'recall_grapheme': 0.983827, 'recall_vowel': 0.993377, 'recall_consonant': 0.996253, 'recall_word': 0.980846, 'acc_grapheme': 0.98223, 'acc_vowel': 0.994691, 'acc_consonant': 0.995015, 'acc_word': 0.980685, 'loss_grapheme': 0.155728, 'loss_vowel': 0.120962, 'loss_consonant': 0.0844, 'loss_word': 0.146366}
SWA>>>:


100%|██████████| 251/251 [01:26<00:00,  3.04it/s]



val: {'recall': 0.990093, 'recall_grapheme': 0.985322, 'recall_vowel': 0.994208, 'recall_consonant': 0.995519, 'recall_word': 0.982676, 'acc_grapheme': 0.984573, 'acc_vowel': 0.994991, 'acc_consonant': 0.995414, 'acc_word': 0.982704, 'loss_grapheme': 0.072534, 'loss_vowel': 0.032184, 'loss_consonant': 0.025704, 'loss_word': 0.075393}
###>>>>> saved ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3_224.pth
   48 | 0.000016 | 160640/160716 | 18.6086 | 11.9723 |
val: {'recall': 0.989311, 'recall_grapheme': 0.98359, 'recall_vowel': 0.994177, 'recall_consonant': 0.995887, 'recall_word': 0.981754, 'acc_grapheme': 0.981831, 'acc_vowel': 0.994841, 'acc_consonant': 0.994891, 'acc_word': 0.981557, 'loss_grapheme': 0.192972, 'loss_vowel': 0.158029, 'loss_consonant': 0.107754, 'loss_word': 0.16852}
   49 | 0.000015 | 054400/160716 | 8.3042 | 12.6752 ||

KeyboardInterrupt: 

In [40]:
train(args)

init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3.pth...
init model4
model file: ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3.pth, exist: True
loading ./model4-ckps/se_resnext50_32x4d/model4_se_resnext50_fold3.pth...

val: {'recall': 0.987867, 'recall_grapheme': 0.981981, 'recall_vowel': 0.993514, 'recall_consonant': 0.993993, 'recall_word': 0.979095, 'acc_grapheme': 0.981009, 'acc_vowel': 0.994492, 'acc_consonant': 0.99362, 'acc_word': 0.97899, 'loss_grapheme': 0.084801, 'loss_vowel': 0.034225, 'loss_consonant': 0.030225, 'loss_word': 0.086658}
CYCLE: 1
    0 | 0.000030 | 160160/160716 | 17.3765 | 12.9856 |
val: {'recall': 0.987116, 'recall_grapheme': 0.98068, 'recall_vowel': 0.992119, 'recall_consonant': 0.994983, 'recall_word': 0.97957, 'acc_grapheme': 0.97909, 'acc_vowel': 0.993744, 'acc_consonant': 0.99367, 'acc_word': 0.979364, 'loss_grapheme': 0.2969

   20 | 0.000109 | 160160/160716 | 12.5166 | 11.4550 |
val: {'recall': 0.987425, 'recall_grapheme': 0.980741, 'recall_vowel': 0.992662, 'recall_consonant': 0.995554, 'recall_word': 0.978316, 'acc_grapheme': 0.979264, 'acc_vowel': 0.994168, 'acc_consonant': 0.993869, 'acc_word': 0.977819, 'loss_grapheme': 0.180255, 'loss_vowel': 0.132443, 'loss_consonant': 0.090824, 'loss_word': 0.165787}
   21 | 0.000106 | 160160/160716 | 6.8766 | 11.3363 ||
val: {'recall': 0.987269, 'recall_grapheme': 0.980304, 'recall_vowel': 0.99329, 'recall_consonant': 0.995176, 'recall_word': 0.97823, 'acc_grapheme': 0.979289, 'acc_vowel': 0.994168, 'acc_consonant': 0.993445, 'acc_word': 0.977943, 'loss_grapheme': 0.169548, 'loss_vowel': 0.111923, 'loss_consonant': 0.080026, 'loss_word': 0.160665}
   22 | 0.000102 | 160160/160716 | 11.1981 | 11.7928 |
val: {'recall': 0.987309, 'recall_grapheme': 0.980512, 'recall_vowel': 0.992872, 'recall_consonant': 0.99534, 'recall_word': 0.979068, 'acc_grapheme': 0.978965, 'acc

   41 | 0.000031 | 160160/160716 | 1.3689 | 11.4202 ||
val: {'recall': 0.988204, 'recall_grapheme': 0.981947, 'recall_vowel': 0.993381, 'recall_consonant': 0.995541, 'recall_word': 0.979776, 'acc_grapheme': 0.980685, 'acc_vowel': 0.994268, 'acc_consonant': 0.994343, 'acc_word': 0.979588, 'loss_grapheme': 0.183105, 'loss_vowel': 0.137544, 'loss_consonant': 0.090824, 'loss_word': 0.165487}
   42 | 0.000028 | 160160/160716 | 9.3395 | 11.8952 ||
val: {'recall': 0.988001, 'recall_grapheme': 0.981648, 'recall_vowel': 0.993227, 'recall_consonant': 0.995481, 'recall_word': 0.97985, 'acc_grapheme': 0.980585, 'acc_vowel': 0.994243, 'acc_consonant': 0.994068, 'acc_word': 0.979638, 'loss_grapheme': 0.220798, 'loss_vowel': 0.170995, 'loss_consonant': 0.112872, 'loss_word': 0.198475}
   43 | 0.000025 | 160160/160716 | 23.0140 | 11.3666 |
val: {'recall': 0.987604, 'recall_grapheme': 0.981609, 'recall_vowel': 0.993538, 'recall_consonant': 0.99366, 'recall_word': 0.979535, 'acc_grapheme': 0.980585, 'ac

    2 | 0.000064 | 022880/160716 | 10.0757 | 10.2515 |

KeyboardInterrupt: 