In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from sklearn.model_selection import StratifiedKFold
from efficientnet_pytorch import EfficientNet
from cvcore.data.auto_augment import RandAugment
from PIL import Image

In [2]:
!pip show cvcore

Name: cvcore
Version: 0.0.1
Summary: Computer Vision Pytorch-based Toolbox
Home-page: UNKNOWN
Author: UNKNOWN
Author-email: UNKNOWN
License: UNKNOWN
Location: /home/chec/bengaliai-cv19
Requires: albumentations, torch, torchvision, timm
Required-by: 


In [3]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [4]:
#!ls /home/chec/data/bengali

In [5]:
DATA_DIR = '/home/chec/data/bengali'

In [6]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [7]:
train_df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic,grapheme
0,Train_0,15,9,5,ক্ট্রো
1,Train_1,159,0,0,হ
2,Train_2,22,3,5,খ্রী
3,Train_3,53,2,2,র্টি
4,Train_4,71,9,5,থ্রো


In [8]:
HEIGHT = 137
WIDTH = 236

In [9]:
import albumentations as albu
'''
def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)
'''
def get_train_augs():
    return RandAugment(n=2, m=27)

In [10]:
#plt.imshow(x)

In [11]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms


class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            #img = augs(image=img)['image']
            img = np.asarray(augs(Image.fromarray(img)))
        
        img = np.expand_dims(img, axis=-1)
        #print('###', img.shape)
        #img = np.concatenate([img, img, img], 2)
        #print('>>>', img.shape)
        
        # taken from https://www.kaggle.com/iafoss/image-preprocessing-128x128
        #MEAN = [ 0.06922848809290576,  0.06922848809290576,  0.06922848809290576]
        #STD = [ 0.20515700083327537,  0.20515700083327537,  0.20515700083327537]
        
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=MEAN, std=STD)
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, ifold=0, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
    train_df = shuffle(train_df, random_state=1234)
    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    #split_index = int(len(train_df) * 0.9)
    
    #train = train_df.iloc[:split_index]
    #val = train_df.iloc[split_index:]
    
    kf = StratifiedKFold(5, random_state=1234, shuffle=True)
    for i, (train_idx, val_idx) in enumerate(kf.split(train_df, train_df['grapheme_root'].values)):
        if i == ifold:
            #print(val_idx)
            train = train_df.iloc[train_idx]
            val = train_df.iloc[val_idx]
            break
    assert i == ifold
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [12]:
#train_loader, val_loader = get_train_val_loaders(dev_mode=True)

# model

In [13]:
#import pretrainedmodels

In [14]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [15]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [16]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [17]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

In [18]:
#model.last_linear.in_features

In [19]:
MEAN = [ 0.06922848809290576 ]
STD = [ 0.20515700083327537 ]

class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        #self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
        #self.fix_input_layer()
        
    def fix_input_layer(self):
        if self.backbone_name in ['se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50', 'senet154', 'se_resnet152', 'nasnetmobile', 'mobilenet', 'nasnetalarge']:
            #self.backbone = eval(backbone_name)()
            #print(self.backbone.layer0.conv1)
            w = self.backbone.layer0.conv1.weight.data
            self.backbone.layer0.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
            #self.backbone.layer0.conv1.weight = torch.nn.Parameter(torch.cat((w, w[:, 2, :, :].unsqueeze(1)), dim=1))
            self.backbone.layer0.conv1.weight = torch.nn.Parameter(w[:, 0, :, :].unsqueeze(1))
        
    def logits(self, x):
        x = self.avg_pool(x)
        #x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        x = F.interpolate(x, size=(224,224), mode='bilinear', align_corners=False)
        for i in range(len(x)):
            transforms.functional.normalize(x[i], mean=MEAN, std=STD, inplace=True)
        x = torch.cat([x,x,x], 1)
        #x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [20]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [21]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [22]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [23]:
round(1/9, 6)

0.111111

In [24]:
import numpy as np
import sklearn.metrics
import torch


def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    #y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y_grapheme, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y_vowel, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y_consonant, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score

def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    
    return metrics

In [25]:
OHEM = False
def ohem_loss(cls_pred, cls_target, rate=0.5):
    batch_size = cls_pred.size(0) 
    ohem_cls_loss = F.cross_entropy(cls_pred, cls_target, reduction='none', ignore_index=-1)

    sorted_ohem_loss, idx = torch.sort(ohem_cls_loss, descending=True)
    keep_num = min(sorted_ohem_loss.size()[0], int(batch_size*rate) )
    if keep_num < sorted_ohem_loss.size()[0]:
        keep_idx_cuda = idx[:keep_num]
        ohem_cls_loss = ohem_cls_loss[keep_idx_cuda]
    cls_loss = ohem_cls_loss.sum() / keep_num
    return cls_loss

In [26]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7], dim=1)
    if OHEM:
        loss0 = ohem_loss(outputs[0], y_true[:, 0])
        loss1 = ohem_loss(outputs[1], y_true[:, 1])
        loss2 = ohem_loss(outputs[2], y_true[:, 2])
    else:
        loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
        loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
        loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    
    return loss0 + loss1 + loss2 #, loss0.item(), loss1.item(), loss2.item()

In [27]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2 = 0., 0., 0.
    preds0, preds1,preds2 = [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    
    return metrics
            

In [28]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [29]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [30]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [31]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [32]:
np.random.random()

0.6303244743705573

In [33]:
from over9000.over9000 import Over9000
from over9000.radam import RAdam
from over9000.lookahead import Lookahead
def LookaheadSGD(params, alpha=0.5, k=6, *args, **kwargs):
     sgd = optim.SGD(params, *args, **kwargs)
     return Lookahead(sgd, alpha, k)
from gridmask import GridMask

In [34]:
def train_old(args):
    global model

    if args.optim == 'Adam':
        optimizer = optim.Adam( [{'params': model.parameters(), 'initial_lr': args.lr }], lr=args.lr, weight_decay=1e-5)
        #optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)
    elif args.optim == 'RAdam':
        optimizer = RAdam(model.parameters(), lr=args.lr)
    elif args.optim == 'Over9000':
        optimizer = Over9000(model.parameters(), lr=args.lr)
    else:
        #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-5)
        optimizer = LookaheadSGD(
            [{'params': model.parameters(), 'initial_lr': args.lr }],
            lr=args.lr, momentum=0.9, weight_decay=1e-5)

    if args.lrs == 'plateau':
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.t_max, eta_min=args.min_lr) #, last_epoch=args.t_max)
        
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    #optimizer.zero_grad()

    #if args.lrs == 'plateau':
    #    lr_scheduler.step(best_metrics)
    #else:
    #    lr_scheduler.step()
    train_iter = 0
    
    grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.)

    for epoch in range(args.num_epochs):

        grid.set_prob(epoch, args.st_epochs)

        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            #do_mixup = False #(np.random.random() < 0.4)
            
            #if do_mixup:
            #    img, targets = mixup(img, targets)
            batch_size = img.size(0)
          
            
            
            #if do_mixup:
            #    loss = mixup_criterion(outputs, targets)
            #else:
            #    loss = criterion(outputs, targets)
            r = np.random.rand()
            #if args.beta > 0 and r < args.cutmix_prob:
            if True:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            elif r > 0.6: # grid mask
                img = grid(img)
                outputs = model(img)
                loss = criterion(outputs, targets)
            else:
                img, targets = mixup(img, targets)
                outputs = model(img)
                loss = mixup_criterion(outputs, targets)
                #loss = criterion(outputs, targets)
            
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            
            #loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            
            #if batch_idx % 4 == 0:
            #    optimizer.step()
            #    optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                #outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
                #preds0 = (torch.max(outputs[0], dim=1)[1]).cpu().numpy()
                #preds1 = (torch.max(outputs[1], dim=1)[1]).cpu().numpy()
                #preds2 = (torch.max(outputs[2], dim=1)[1]).cpu().numpy()
                #train_metrics = calc_metrics(preds0, preds1, preds2, targets.cpu().numpy())
                #print('train:', train_metrics)
                #save_model(model, model_file+'_latest')
                val_metrics = validate(model, val_loader)
                print('\nval:', val_metrics)
                
                if val_metrics[best_key] > best_metrics:
                    best_metrics = val_metrics[best_key]
                    save_model(model, model_file)
                    print('** saved')
                
                model.train()
                
                if args.lrs == 'plateau':
                    lr_scheduler.step(best_metrics)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
        
    

In [35]:
from cvcore.solver import WarmupCyclicalLR
def make_optimizer(model, base_lr=4e-4, weight_decay=0., weight_decay_bias=0., epsilon=1e-3):
    """
    Create optimizer with per-layer learning rate and weight decay.
    """
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = base_lr
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay_bias if 'bias' in key else weight_decay}]
    
    optimizer = torch.optim.AdamW(params, lr, eps=epsilon)
    return optimizer

In [48]:
def train(args, model):
    optimizer = make_optimizer(model)
    scheduler = WarmupCyclicalLR(
        "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)
    
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    
    for cycle in range(1, args.num_cycles+1):
        print('CYCLE:', cycle)
        train_cycle(args, model, optimizer, scheduler)
        scheduler = WarmupCyclicalLR(
            "cos", args.base_lr, args.num_epochs, iters_per_epoch=len(train_loader), warmup_epochs=args.warmup_epochs)

def train_cycle(args, model, optimizer, lr_scheduler):
    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    train_iter = 0

    for epoch in range(args.num_epochs):
        train_loss = 0

        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            batch_size = img.size(0)
            r = np.random.rand()

            if True:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            
            optimizer.zero_grad()
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            #loss.backward()
            lr_scheduler(optimizer, batch_idx, epoch)
            optimizer.step()            
            
            current_lr = get_lrs(optimizer)

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

        if True:#train_iter > 0 and train_iter % args.iter_val == 0:
            val_metrics = validate(model, val_loader)
            print('\nval:', val_metrics)
                
            if val_metrics[best_key] > best_metrics:
                best_metrics = val_metrics[best_key]
                save_model(model, model_file)
                print('###>>>>> saved')
                
            model.train()

In [45]:
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'model3_se_resnext50_fold0_mixup_cutmix_224_gridmask.pth'
args.predict = False
args.optim = 'Adam'
args.lr = 1e-4
args.lrs = 'cosine'
args.t_max = 12
args.factor = 0.5
args.patience = 0
args.min_lr = 1e-6
args.iter_val = 200

args.base_lr = 4e-4
args.num_epochs = 150
args.warmup_epochs = 10
args.num_cycles = 100
args.batch_size = 768
args.val_batch_size = 1024
args.st_epochs = 10

args.beta = 1.5
args.cutmix_prob = 0.5

In [38]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size, ifold=0)

(200840, 5)
(200840, 32332)
(160596, 5) (40244, 5)


In [39]:
len(train_loader)

209

In [50]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


model file: ./models/se_resnext50_32x4d/model3_se_resnext50_fold0_mixup_cutmix_224_gridmask.pth, exist: True
loading ./models/se_resnext50_32x4d/model3_se_resnext50_fold0_mixup_cutmix_224_gridmask.pth...


In [None]:
train(args, model)

CYCLE: 1
{'recall': 0.997662, 'recall_grapheme': 0.997015, 'recall_vowel': 0.998562, 'recall_consonant': 0.998055, 'acc_grapheme': 0.996397, 'acc_vowel': 0.998484, 'acc_consonant': 0.998758, 'loss_grapheme': 0.020853, 'loss_vowel': 0.012834, 'loss_consonant': 0.009888}
    0 | 0.000040 | 160512/160596 | 3.7143 | 4.5922 |
val: {'recall': 0.991667, 'recall_grapheme': 0.988034, 'recall_vowel': 0.994864, 'recall_consonant': 0.995736, 'acc_grapheme': 0.987949, 'acc_vowel': 0.995701, 'acc_consonant': 0.994931, 'loss_grapheme': 0.162769, 'loss_vowel': 0.131382, 'loss_consonant': 0.088483}
    1 | 0.000080 | 160512/160596 | 1.3471 | 3.8837 |
val: {'recall': 0.991355, 'recall_grapheme': 0.987691, 'recall_vowel': 0.99516, 'recall_consonant': 0.99488, 'acc_grapheme': 0.988421, 'acc_vowel': 0.996024, 'acc_consonant': 0.995701, 'loss_grapheme': 0.115625, 'loss_vowel': 0.102441, 'loss_consonant': 0.065003}
    2 | 0.000120 | 160512/160596 | 4.2119 | 3.5793 |
val: {'recall': 0.991888, 'recall_graphem

   25 | 0.000371 | 160512/160596 | 2.8086 | 2.9233 |
val: {'recall': 0.994253, 'recall_grapheme': 0.992119, 'recall_vowel': 0.996172, 'recall_consonant': 0.996601, 'acc_grapheme': 0.992645, 'acc_vowel': 0.996472, 'acc_consonant': 0.996372, 'loss_grapheme': 0.182938, 'loss_vowel': 0.151994, 'loss_consonant': 0.105618}
   26 | 0.000369 | 160512/160596 | 2.1511 | 2.9727 |
val: {'recall': 0.994593, 'recall_grapheme': 0.992691, 'recall_vowel': 0.996725, 'recall_consonant': 0.996266, 'acc_grapheme': 0.992893, 'acc_vowel': 0.996521, 'acc_consonant': 0.99677, 'loss_grapheme': 0.147412, 'loss_vowel': 0.127952, 'loss_consonant': 0.08814}
   27 | 0.000367 | 160512/160596 | 3.2896 | 2.8683 |
val: {'recall': 0.993786, 'recall_grapheme': 0.991826, 'recall_vowel': 0.996218, 'recall_consonant': 0.995275, 'acc_grapheme': 0.991949, 'acc_vowel': 0.996596, 'acc_consonant': 0.99677, 'loss_grapheme': 0.18575, 'loss_vowel': 0.166305, 'loss_consonant': 0.115988}
   28 | 0.000364 | 160512/160596 | 2.2451 | 2.9

   51 | 0.000293 | 160512/160596 | 0.4082 | 2.7416 |
val: {'recall': 0.995864, 'recall_grapheme': 0.994278, 'recall_vowel': 0.997531, 'recall_consonant': 0.997367, 'acc_grapheme': 0.994036, 'acc_vowel': 0.99759, 'acc_consonant': 0.997292, 'loss_grapheme': 0.138626, 'loss_vowel': 0.116432, 'loss_consonant': 0.079644}
   52 | 0.000289 | 160512/160596 | 2.5249 | 2.6724 |
val: {'recall': 0.994808, 'recall_grapheme': 0.993157, 'recall_vowel': 0.9971, 'recall_consonant': 0.995817, 'acc_grapheme': 0.992819, 'acc_vowel': 0.997118, 'acc_consonant': 0.997366, 'loss_grapheme': 0.125815, 'loss_vowel': 0.125438, 'loss_consonant': 0.081933}
   53 | 0.000285 | 160512/160596 | 0.6594 | 2.8468 |
val: {'recall': 0.993904, 'recall_grapheme': 0.993003, 'recall_vowel': 0.996837, 'recall_consonant': 0.992774, 'acc_grapheme': 0.993465, 'acc_vowel': 0.997242, 'acc_consonant': 0.997217, 'loss_grapheme': 0.146773, 'loss_vowel': 0.130469, 'loss_consonant': 0.090217}
   54 | 0.000281 | 160512/160596 | 3.1429 | 2.

   77 | 0.000187 | 160512/160596 | 3.4715 | 2.7270 |
val: {'recall': 0.994451, 'recall_grapheme': 0.992321, 'recall_vowel': 0.996411, 'recall_consonant': 0.996752, 'acc_grapheme': 0.992595, 'acc_vowel': 0.997043, 'acc_consonant': 0.997068, 'loss_grapheme': 0.10043, 'loss_vowel': 0.083092, 'loss_consonant': 0.068987}
   78 | 0.000183 | 160512/160596 | 2.7104 | 2.5906 |
val: {'recall': 0.993042, 'recall_grapheme': 0.990616, 'recall_vowel': 0.99526, 'recall_consonant': 0.995675, 'acc_grapheme': 0.991552, 'acc_vowel': 0.996621, 'acc_consonant': 0.996273, 'loss_grapheme': 0.075243, 'loss_vowel': 0.0668, 'loss_consonant': 0.054329}
   79 | 0.000179 | 160512/160596 | 3.0923 | 2.6513 |
val: {'recall': 0.995421, 'recall_grapheme': 0.993855, 'recall_vowel': 0.996793, 'recall_consonant': 0.997182, 'acc_grapheme': 0.993987, 'acc_vowel': 0.997565, 'acc_consonant': 0.997913, 'loss_grapheme': 0.099417, 'loss_vowel': 0.084683, 'loss_consonant': 0.062864}
   80 | 0.000175 | 160512/160596 | 3.1681 | 2.5

  103 | 0.000086 | 160512/160596 | 3.4126 | 2.5433 |
val: {'recall': 0.995634, 'recall_grapheme': 0.994052, 'recall_vowel': 0.997288, 'recall_consonant': 0.997144, 'acc_grapheme': 0.994384, 'acc_vowel': 0.997888, 'acc_consonant': 0.997962, 'loss_grapheme': 0.092098, 'loss_vowel': 0.074394, 'loss_consonant': 0.058841}
  104 | 0.000082 | 160512/160596 | 2.5624 | 2.6192 |
val: {'recall': 0.994972, 'recall_grapheme': 0.993398, 'recall_vowel': 0.996869, 'recall_consonant': 0.996222, 'acc_grapheme': 0.99339, 'acc_vowel': 0.997515, 'acc_consonant': 0.997391, 'loss_grapheme': 0.072817, 'loss_vowel': 0.05912, 'loss_consonant': 0.051929}
  105 | 0.000079 | 160512/160596 | 2.6586 | 2.6816 |
val: {'recall': 0.995759, 'recall_grapheme': 0.99426, 'recall_vowel': 0.99754, 'recall_consonant': 0.996975, 'acc_grapheme': 0.994782, 'acc_vowel': 0.998211, 'acc_consonant': 0.998211, 'loss_grapheme': 0.093794, 'loss_vowel': 0.076679, 'loss_consonant': 0.056322}
  106 | 0.000076 | 160512/160596 | 3.1646 | 2.6

  129 | 0.000017 | 160512/160596 | 2.8139 | 2.5360 |
val: {'recall': 0.99589, 'recall_grapheme': 0.994318, 'recall_vowel': 0.997794, 'recall_consonant': 0.997128, 'acc_grapheme': 0.994633, 'acc_vowel': 0.998087, 'acc_consonant': 0.998012, 'loss_grapheme': 0.084292, 'loss_vowel': 0.070467, 'loss_consonant': 0.054091}
  130 | 0.000016 | 160512/160596 | 2.5479 | 2.6107 |
val: {'recall': 0.995545, 'recall_grapheme': 0.99391, 'recall_vowel': 0.997492, 'recall_consonant': 0.996867, 'acc_grapheme': 0.994409, 'acc_vowel': 0.998112, 'acc_consonant': 0.998161, 'loss_grapheme': 0.08089, 'loss_vowel': 0.069069, 'loss_consonant': 0.051299}
  131 | 0.000014 | 160512/160596 | 0.6234 | 2.5317 |
val: {'recall': 0.995621, 'recall_grapheme': 0.99387, 'recall_vowel': 0.997379, 'recall_consonant': 0.997367, 'acc_grapheme': 0.994434, 'acc_vowel': 0.997938, 'acc_consonant': 0.998062, 'loss_grapheme': 0.083018, 'loss_vowel': 0.068858, 'loss_consonant': 0.051825}
  132 | 0.000013 | 160512/160596 | 3.1110 | 2.5

    4 | 0.000199 | 160512/160596 | 3.2430 | 2.6409 |
val: {'recall': 0.994975, 'recall_grapheme': 0.992926, 'recall_vowel': 0.997233, 'recall_consonant': 0.996815, 'acc_grapheme': 0.993564, 'acc_vowel': 0.997863, 'acc_consonant': 0.997813, 'loss_grapheme': 0.097662, 'loss_vowel': 0.077484, 'loss_consonant': 0.065074}
    5 | 0.000239 | 160512/160596 | 3.4410 | 2.6784 |
val: {'recall': 0.9944, 'recall_grapheme': 0.992229, 'recall_vowel': 0.996443, 'recall_consonant': 0.996701, 'acc_grapheme': 0.992918, 'acc_vowel': 0.997093, 'acc_consonant': 0.997465, 'loss_grapheme': 0.102694, 'loss_vowel': 0.078459, 'loss_consonant': 0.060353}
    6 | 0.000278 | 160512/160596 | 3.3723 | 2.6678 |
val: {'recall': 0.993575, 'recall_grapheme': 0.991454, 'recall_vowel': 0.995752, 'recall_consonant': 0.995637, 'acc_grapheme': 0.991825, 'acc_vowel': 0.996944, 'acc_consonant': 0.99672, 'loss_grapheme': 0.079037, 'loss_vowel': 0.064861, 'loss_consonant': 0.055288}
    7 | 0.000318 | 160512/160596 | 1.9315 | 2.

   30 | 0.000359 | 160512/160596 | 3.2886 | 2.6877 |
val: {'recall': 0.99443, 'recall_grapheme': 0.992132, 'recall_vowel': 0.996809, 'recall_consonant': 0.996646, 'acc_grapheme': 0.993018, 'acc_vowel': 0.997316, 'acc_consonant': 0.997615, 'loss_grapheme': 0.109324, 'loss_vowel': 0.091417, 'loss_consonant': 0.068255}
   31 | 0.000357 | 160512/160596 | 1.2234 | 2.7205 |
val: {'recall': 0.993914, 'recall_grapheme': 0.992395, 'recall_vowel': 0.995682, 'recall_consonant': 0.995182, 'acc_grapheme': 0.992198, 'acc_vowel': 0.996571, 'acc_consonant': 0.996968, 'loss_grapheme': 0.092065, 'loss_vowel': 0.072639, 'loss_consonant': 0.059731}
   32 | 0.000354 | 160512/160596 | 2.7971 | 2.6335 |
val: {'recall': 0.994305, 'recall_grapheme': 0.99225, 'recall_vowel': 0.995968, 'recall_consonant': 0.99675, 'acc_grapheme': 0.992098, 'acc_vowel': 0.997093, 'acc_consonant': 0.997366, 'loss_grapheme': 0.0896, 'loss_vowel': 0.068984, 'loss_consonant': 0.054972}
   33 | 0.000351 | 160512/160596 | 3.6367 | 2.68

   57 | 0.000270 | 160512/160596 | 2.3736 | 2.6065 |
val: {'recall': 0.993649, 'recall_grapheme': 0.993496, 'recall_vowel': 0.996746, 'recall_consonant': 0.990858, 'acc_grapheme': 0.99339, 'acc_vowel': 0.997441, 'acc_consonant': 0.99754, 'loss_grapheme': 0.097688, 'loss_vowel': 0.078026, 'loss_consonant': 0.05184}
   58 | 0.000266 | 160512/160596 | 3.0683 | 2.5878 |
val: {'recall': 0.994694, 'recall_grapheme': 0.993479, 'recall_vowel': 0.996953, 'recall_consonant': 0.994864, 'acc_grapheme': 0.993639, 'acc_vowel': 0.997118, 'acc_consonant': 0.99754, 'loss_grapheme': 0.077272, 'loss_vowel': 0.065965, 'loss_consonant': 0.04952}
   59 | 0.000262 | 160512/160596 | 2.4986 | 2.5519 |
val: {'recall': 0.995456, 'recall_grapheme': 0.994173, 'recall_vowel': 0.996999, 'recall_consonant': 0.996477, 'acc_grapheme': 0.994409, 'acc_vowel': 0.997938, 'acc_consonant': 0.997938, 'loss_grapheme': 0.108539, 'loss_vowel': 0.089925, 'loss_consonant': 0.059481}
   60 | 0.000258 | 160512/160596 | 2.7462 | 2.50

   83 | 0.000163 | 160512/160596 | 2.3558 | 2.5043 |
val: {'recall': 0.99629, 'recall_grapheme': 0.995616, 'recall_vowel': 0.997859, 'recall_consonant': 0.99607, 'acc_grapheme': 0.995204, 'acc_vowel': 0.998211, 'acc_consonant': 0.99841, 'loss_grapheme': 0.044234, 'loss_vowel': 0.031369, 'loss_consonant': 0.023754}
###>>>>> saved
   84 | 0.000158 | 160512/160596 | 3.2219 | 2.5181 |
val: {'recall': 0.996175, 'recall_grapheme': 0.995697, 'recall_vowel': 0.997772, 'recall_consonant': 0.995535, 'acc_grapheme': 0.995378, 'acc_vowel': 0.998285, 'acc_consonant': 0.998186, 'loss_grapheme': 0.054106, 'loss_vowel': 0.040695, 'loss_consonant': 0.030917}
   85 | 0.000154 | 160512/160596 | 3.1306 | 2.5137 |
val: {'recall': 0.994964, 'recall_grapheme': 0.993673, 'recall_vowel': 0.996885, 'recall_consonant': 0.995626, 'acc_grapheme': 0.993018, 'acc_vowel': 0.997018, 'acc_consonant': 0.997391, 'loss_grapheme': 0.066877, 'loss_vowel': 0.055273, 'loss_consonant': 0.040667}
   86 | 0.000150 | 160512/16059

  109 | 0.000066 | 160512/160596 | 2.7923 | 2.4014 |
val: {'recall': 0.995748, 'recall_grapheme': 0.994853, 'recall_vowel': 0.997682, 'recall_consonant': 0.995606, 'acc_grapheme': 0.994832, 'acc_vowel': 0.998161, 'acc_consonant': 0.998087, 'loss_grapheme': 0.070898, 'loss_vowel': 0.061281, 'loss_consonant': 0.044986}
  110 | 0.000063 | 160512/160596 | 2.6656 | 2.4247 |
val: {'recall': 0.995728, 'recall_grapheme': 0.994878, 'recall_vowel': 0.997468, 'recall_consonant': 0.995686, 'acc_grapheme': 0.995055, 'acc_vowel': 0.998136, 'acc_consonant': 0.998236, 'loss_grapheme': 0.059392, 'loss_vowel': 0.050205, 'loss_consonant': 0.038054}
  111 | 0.000060 | 160512/160596 | 2.3770 | 2.4614 |
val: {'recall': 0.996089, 'recall_grapheme': 0.995381, 'recall_vowel': 0.997904, 'recall_consonant': 0.995689, 'acc_grapheme': 0.995328, 'acc_vowel': 0.99836, 'acc_consonant': 0.998261, 'loss_grapheme': 0.057588, 'loss_vowel': 0.045666, 'loss_consonant': 0.034375}
  112 | 0.000057 | 160512/160596 | 1.7868 | 

  135 | 0.000009 | 160512/160596 | 2.9291 | 2.3746 |
val: {'recall': 0.995782, 'recall_grapheme': 0.994889, 'recall_vowel': 0.997947, 'recall_consonant': 0.995405, 'acc_grapheme': 0.99508, 'acc_vowel': 0.99831, 'acc_consonant': 0.998037, 'loss_grapheme': 0.052319, 'loss_vowel': 0.041142, 'loss_consonant': 0.031809}
  136 | 0.000007 | 160512/160596 | 0.4854 | 2.4913 |
val: {'recall': 0.996287, 'recall_grapheme': 0.995737, 'recall_vowel': 0.998156, 'recall_consonant': 0.995517, 'acc_grapheme': 0.995751, 'acc_vowel': 0.998459, 'acc_consonant': 0.998285, 'loss_grapheme': 0.062107, 'loss_vowel': 0.049165, 'loss_consonant': 0.036187}
  137 | 0.000006 | 160512/160596 | 2.4149 | 2.4376 |
val: {'recall': 0.995406, 'recall_grapheme': 0.994094, 'recall_vowel': 0.997632, 'recall_consonant': 0.995806, 'acc_grapheme': 0.994185, 'acc_vowel': 0.997938, 'acc_consonant': 0.997764, 'loss_grapheme': 0.042492, 'loss_vowel': 0.031478, 'loss_consonant': 0.025419}
  138 | 0.000005 | 160512/160596 | 1.7052 | 2

   10 | 0.000395 | 160512/160596 | 1.3006 | 2.5859 |
val: {'recall': 0.994473, 'recall_grapheme': 0.993137, 'recall_vowel': 0.997218, 'recall_consonant': 0.994399, 'acc_grapheme': 0.993266, 'acc_vowel': 0.997465, 'acc_consonant': 0.997292, 'loss_grapheme': 0.073468, 'loss_vowel': 0.045225, 'loss_consonant': 0.034475}
   11 | 0.000394 | 160512/160596 | 2.8290 | 2.5688 |
val: {'recall': 0.995097, 'recall_grapheme': 0.994265, 'recall_vowel': 0.997337, 'recall_consonant': 0.994521, 'acc_grapheme': 0.994086, 'acc_vowel': 0.997863, 'acc_consonant': 0.997341, 'loss_grapheme': 0.048957, 'loss_vowel': 0.036477, 'loss_consonant': 0.030719}
   12 | 0.000393 | 160512/160596 | 3.1117 | 2.5254 |
val: {'recall': 0.994089, 'recall_grapheme': 0.992693, 'recall_vowel': 0.996719, 'recall_consonant': 0.994253, 'acc_grapheme': 0.992893, 'acc_vowel': 0.996968, 'acc_consonant': 0.996944, 'loss_grapheme': 0.070029, 'loss_vowel': 0.052624, 'loss_consonant': 0.041337}
   13 | 0.000391 | 160512/160596 | 2.8474 |

In [51]:
train(args)

{'recall': 0.997662, 'recall_grapheme': 0.997015, 'recall_vowel': 0.998562, 'recall_consonant': 0.998055, 'acc_grapheme': 0.996397, 'acc_vowel': 0.998484, 'acc_consonant': 0.998758, 'loss_grapheme': 0.020853, 'loss_vowel': 0.012834, 'loss_consonant': 0.009888}
    0 | 0.000001 | 153600/160596 | 1.4446 | 0.9656 |
val: {'recall': 0.99721, 'recall_grapheme': 0.996408, 'recall_vowel': 0.998129, 'recall_consonant': 0.997894, 'acc_grapheme': 0.995751, 'acc_vowel': 0.998261, 'acc_consonant': 0.998559, 'loss_grapheme': 0.035584, 'loss_vowel': 0.026779, 'loss_consonant': 0.018523}
    1 | 0.000003 | 146688/160596 | 0.0003 | 0.9426 |
val: {'recall': 0.996978, 'recall_grapheme': 0.996377, 'recall_vowel': 0.998253, 'recall_consonant': 0.996908, 'acc_grapheme': 0.996024, 'acc_vowel': 0.99841, 'acc_consonant': 0.998758, 'loss_grapheme': 0.02673, 'loss_vowel': 0.017341, 'loss_consonant': 0.011721}
    2 | 0.000008 | 139776/160596 | 2.2016 | 1.0837 |
val: {'recall': 0.996654, 'recall_grapheme': 0.9954

   24 | 0.000003 | 141312/160596 | 0.0016 | 0.8818 |
val: {'recall': 0.996344, 'recall_grapheme': 0.995153, 'recall_vowel': 0.997321, 'recall_consonant': 0.997749, 'acc_grapheme': 0.994682, 'acc_vowel': 0.997888, 'acc_consonant': 0.998112, 'loss_grapheme': 0.034706, 'loss_vowel': 0.021441, 'loss_consonant': 0.014329}
   25 | 0.000008 | 134400/160596 | 2.5486 | 0.8915 |
val: {'recall': 0.995837, 'recall_grapheme': 0.994888, 'recall_vowel': 0.99658, 'recall_consonant': 0.996992, 'acc_grapheme': 0.994086, 'acc_vowel': 0.997813, 'acc_consonant': 0.997788, 'loss_grapheme': 0.031059, 'loss_vowel': 0.01747, 'loss_consonant': 0.013197}
   26 | 0.000015 | 127488/160596 | 0.0078 | 0.9009 |
val: {'recall': 0.996001, 'recall_grapheme': 0.994741, 'recall_vowel': 0.997254, 'recall_consonant': 0.997269, 'acc_grapheme': 0.994459, 'acc_vowel': 0.997962, 'acc_consonant': 0.998037, 'loss_grapheme': 0.026482, 'loss_vowel': 0.012969, 'loss_consonant': 0.010198}
   27 | 0.000026 | 120576/160596 | 0.4286 | 0

   49 | 0.000015 | 122112/160596 | 0.0052 | 0.8903 |
val: {'recall': 0.996326, 'recall_grapheme': 0.99521, 'recall_vowel': 0.997815, 'recall_consonant': 0.99707, 'acc_grapheme': 0.994956, 'acc_vowel': 0.998112, 'acc_consonant': 0.998136, 'loss_grapheme': 0.024882, 'loss_vowel': 0.011913, 'loss_consonant': 0.00914}
   50 | 0.000026 | 115200/160596 | 1.7909 | 0.8895 |
val: {'recall': 0.996179, 'recall_grapheme': 0.994923, 'recall_vowel': 0.997317, 'recall_consonant': 0.997551, 'acc_grapheme': 0.994633, 'acc_vowel': 0.998037, 'acc_consonant': 0.998136, 'loss_grapheme': 0.03195, 'loss_vowel': 0.017314, 'loss_consonant': 0.012171}
   51 | 0.000038 | 108288/160596 | 2.9374 | 1.1017 |
val: {'recall': 0.99492, 'recall_grapheme': 0.99313, 'recall_vowel': 0.996908, 'recall_consonant': 0.996512, 'acc_grapheme': 0.993142, 'acc_vowel': 0.997615, 'acc_consonant': 0.997416, 'loss_grapheme': 0.042615, 'loss_vowel': 0.029905, 'loss_consonant': 0.022156}
   52 | 0.000051 | 101376/160596 | 0.0255 | 1.024

   74 | 0.000038 | 102912/160596 | 0.3834 | 0.9381 |
val: {'recall': 0.996322, 'recall_grapheme': 0.994912, 'recall_vowel': 0.997788, 'recall_consonant': 0.997677, 'acc_grapheme': 0.994707, 'acc_vowel': 0.998112, 'acc_consonant': 0.998012, 'loss_grapheme': 0.034844, 'loss_vowel': 0.019049, 'loss_consonant': 0.012451}
   75 | 0.000051 | 096000/160596 | 1.0288 | 1.1886 |
val: {'recall': 0.995866, 'recall_grapheme': 0.994131, 'recall_vowel': 0.997229, 'recall_consonant': 0.997973, 'acc_grapheme': 0.993763, 'acc_vowel': 0.997664, 'acc_consonant': 0.997714, 'loss_grapheme': 0.036802, 'loss_vowel': 0.021659, 'loss_consonant': 0.01456}
   76 | 0.000063 | 089088/160596 | 1.6046 | 1.0469 |
val: {'recall': 0.995025, 'recall_grapheme': 0.993019, 'recall_vowel': 0.997337, 'recall_consonant': 0.996724, 'acc_grapheme': 0.993614, 'acc_vowel': 0.997813, 'acc_consonant': 0.997714, 'loss_grapheme': 0.057468, 'loss_vowel': 0.037118, 'loss_consonant': 0.023561}
   77 | 0.000075 | 082176/160596 | 1.3180 | 

   99 | 0.000063 | 083712/160596 | 0.8658 | 0.8452 |
val: {'recall': 0.995913, 'recall_grapheme': 0.994951, 'recall_vowel': 0.996933, 'recall_consonant': 0.996816, 'acc_grapheme': 0.994707, 'acc_vowel': 0.998062, 'acc_consonant': 0.997962, 'loss_grapheme': 0.02491, 'loss_vowel': 0.010231, 'loss_consonant': 0.00909}
  100 | 0.000075 | 076800/160596 | 0.0090 | 1.0329 |
val: {'recall': 0.995468, 'recall_grapheme': 0.993822, 'recall_vowel': 0.99734, 'recall_consonant': 0.996886, 'acc_grapheme': 0.993912, 'acc_vowel': 0.997788, 'acc_consonant': 0.997764, 'loss_grapheme': 0.038115, 'loss_vowel': 0.023456, 'loss_consonant': 0.016523}
  101 | 0.000086 | 069888/160596 | 0.0022 | 0.9838 |
val: {'recall': 0.995076, 'recall_grapheme': 0.993599, 'recall_vowel': 0.996583, 'recall_consonant': 0.996525, 'acc_grapheme': 0.994061, 'acc_vowel': 0.997441, 'acc_consonant': 0.997664, 'loss_grapheme': 0.041703, 'loss_vowel': 0.023968, 'loss_consonant': 0.01549}
  102 | 0.000093 | 062976/160596 | 2.5671 | 1.2

KeyboardInterrupt: 

In [49]:
del model

In [63]:
train(args)

{'recall': 0.996988, 'recall_grapheme': 0.996238, 'recall_vowel': 0.99774, 'recall_consonant': 0.997735, 'acc_grapheme': 0.995254, 'acc_vowel': 0.998161, 'acc_consonant': 0.998136, 'loss_grapheme': 0.022178, 'loss_vowel': 0.010151, 'loss_consonant': 0.008258}
    0 | 0.000001 | 153600/160596 | 0.0004 | 1.0279 |
val: {'recall': 0.996261, 'recall_grapheme': 0.995071, 'recall_vowel': 0.997409, 'recall_consonant': 0.997492, 'acc_grapheme': 0.994036, 'acc_vowel': 0.997863, 'acc_consonant': 0.998037, 'loss_grapheme': 0.055595, 'loss_vowel': 0.033633, 'loss_consonant': 0.020962}
    1 | 0.000003 | 146688/160596 | 2.1166 | 1.0477 |
val: {'recall': 0.996363, 'recall_grapheme': 0.995294, 'recall_vowel': 0.997246, 'recall_consonant': 0.997619, 'acc_grapheme': 0.994161, 'acc_vowel': 0.997888, 'acc_consonant': 0.998012, 'loss_grapheme': 0.042929, 'loss_vowel': 0.027253, 'loss_consonant': 0.017694}
    2 | 0.000008 | 139776/160596 | 1.2467 | 0.8781 |
val: {'recall': 0.996636, 'recall_grapheme': 0.99

   24 | 0.000003 | 141312/160596 | 2.0666 | 0.9990 |
val: {'recall': 0.996368, 'recall_grapheme': 0.995275, 'recall_vowel': 0.997307, 'recall_consonant': 0.997614, 'acc_grapheme': 0.995155, 'acc_vowel': 0.997739, 'acc_consonant': 0.998136, 'loss_grapheme': 0.052939, 'loss_vowel': 0.031463, 'loss_consonant': 0.020814}
   25 | 0.000008 | 134400/160596 | 0.0072 | 1.0217 |
val: {'recall': 0.997029, 'recall_grapheme': 0.996288, 'recall_vowel': 0.99753, 'recall_consonant': 0.99801, 'acc_grapheme': 0.995652, 'acc_vowel': 0.997938, 'acc_consonant': 0.998385, 'loss_grapheme': 0.023453, 'loss_vowel': 0.012796, 'loss_consonant': 0.009681}
** saved
   26 | 0.000015 | 127488/160596 | 0.0093 | 0.9958 |
val: {'recall': 0.99685, 'recall_grapheme': 0.995877, 'recall_vowel': 0.997737, 'recall_consonant': 0.997908, 'acc_grapheme': 0.995478, 'acc_vowel': 0.998062, 'acc_consonant': 0.99836, 'loss_grapheme': 0.024279, 'loss_vowel': 0.012967, 'loss_consonant': 0.009594}
   27 | 0.000026 | 120576/160596 | 2.4

   49 | 0.000015 | 122112/160596 | 2.3226 | 0.9520 |
val: {'recall': 0.996397, 'recall_grapheme': 0.99525, 'recall_vowel': 0.997336, 'recall_consonant': 0.997752, 'acc_grapheme': 0.994782, 'acc_vowel': 0.997938, 'acc_consonant': 0.998136, 'loss_grapheme': 0.025054, 'loss_vowel': 0.012843, 'loss_consonant': 0.00948}
   50 | 0.000026 | 115200/160596 | 0.8201 | 0.9728 |
val: {'recall': 0.996282, 'recall_grapheme': 0.99503, 'recall_vowel': 0.997345, 'recall_consonant': 0.997724, 'acc_grapheme': 0.994633, 'acc_vowel': 0.997838, 'acc_consonant': 0.998037, 'loss_grapheme': 0.033476, 'loss_vowel': 0.019314, 'loss_consonant': 0.013521}
   51 | 0.000038 | 108288/160596 | 0.0026 | 1.0012 |
val: {'recall': 0.995873, 'recall_grapheme': 0.994213, 'recall_vowel': 0.997306, 'recall_consonant': 0.997761, 'acc_grapheme': 0.994036, 'acc_vowel': 0.997913, 'acc_consonant': 0.998062, 'loss_grapheme': 0.03212, 'loss_vowel': 0.018114, 'loss_consonant': 0.012488}
   52 | 0.000051 | 101376/160596 | 1.5500 | 0.9

   74 | 0.000038 | 102912/160596 | 1.2997 | 0.9296 |
val: {'recall': 0.996029, 'recall_grapheme': 0.994687, 'recall_vowel': 0.997083, 'recall_consonant': 0.997658, 'acc_grapheme': 0.994086, 'acc_vowel': 0.997863, 'acc_consonant': 0.997913, 'loss_grapheme': 0.029208, 'loss_vowel': 0.014379, 'loss_consonant': 0.011922}
   75 | 0.000051 | 096000/160596 | 0.0292 | 0.9989 |
val: {'recall': 0.995316, 'recall_grapheme': 0.993807, 'recall_vowel': 0.996855, 'recall_consonant': 0.996795, 'acc_grapheme': 0.993589, 'acc_vowel': 0.997565, 'acc_consonant': 0.99749, 'loss_grapheme': 0.034512, 'loss_vowel': 0.018202, 'loss_consonant': 0.014359}
   76 | 0.000063 | 089088/160596 | 1.7036 | 0.9700 |
val: {'recall': 0.994697, 'recall_grapheme': 0.992385, 'recall_vowel': 0.997112, 'recall_consonant': 0.996907, 'acc_grapheme': 0.992769, 'acc_vowel': 0.99759, 'acc_consonant': 0.997441, 'loss_grapheme': 0.043566, 'loss_vowel': 0.027106, 'loss_consonant': 0.017781}
   77 | 0.000075 | 082176/160596 | 0.0129 | 0

   99 | 0.000063 | 083712/160596 | 0.0032 | 0.9283 |
val: {'recall': 0.995798, 'recall_grapheme': 0.993975, 'recall_vowel': 0.997676, 'recall_consonant': 0.997567, 'acc_grapheme': 0.993887, 'acc_vowel': 0.997938, 'acc_consonant': 0.998037, 'loss_grapheme': 0.03879, 'loss_vowel': 0.021083, 'loss_consonant': 0.013372}
  100 | 0.000075 | 076800/160596 | 0.0013 | 1.0293 |
val: {'recall': 0.994072, 'recall_grapheme': 0.991287, 'recall_vowel': 0.997061, 'recall_consonant': 0.996655, 'acc_grapheme': 0.991999, 'acc_vowel': 0.997068, 'acc_consonant': 0.997465, 'loss_grapheme': 0.068855, 'loss_vowel': 0.047148, 'loss_consonant': 0.030391}
  101 | 0.000086 | 069888/160596 | 0.0044 | 1.0754 |
val: {'recall': 0.995593, 'recall_grapheme': 0.993741, 'recall_vowel': 0.997399, 'recall_consonant': 0.997492, 'acc_grapheme': 0.99344, 'acc_vowel': 0.99749, 'acc_consonant': 0.997565, 'loss_grapheme': 0.039439, 'loss_vowel': 0.022155, 'loss_consonant': 0.015047}
  102 | 0.000093 | 062976/160596 | 0.0081 | 0.

  124 | 0.000086 | 064512/160596 | 2.3480 | 1.0901 |
val: {'recall': 0.994852, 'recall_grapheme': 0.993261, 'recall_vowel': 0.996844, 'recall_consonant': 0.996043, 'acc_grapheme': 0.992794, 'acc_vowel': 0.99754, 'acc_consonant': 0.997366, 'loss_grapheme': 0.058023, 'loss_vowel': 0.044425, 'loss_consonant': 0.027339}
  125 | 0.000093 | 057600/160596 | 0.0134 | 1.0411 |
val: {'recall': 0.995032, 'recall_grapheme': 0.993427, 'recall_vowel': 0.996918, 'recall_consonant': 0.996359, 'acc_grapheme': 0.993192, 'acc_vowel': 0.997391, 'acc_consonant': 0.997465, 'loss_grapheme': 0.044581, 'loss_vowel': 0.025998, 'loss_consonant': 0.019002}
  126 | 0.000098 | 050688/160596 | 1.2189 | 0.8846 |
val: {'recall': 0.994566, 'recall_grapheme': 0.99286, 'recall_vowel': 0.996565, 'recall_consonant': 0.995978, 'acc_grapheme': 0.993092, 'acc_vowel': 0.997316, 'acc_consonant': 0.996894, 'loss_grapheme': 0.103283, 'loss_vowel': 0.05746, 'loss_consonant': 0.033882}
  127 | 0.000100 | 043776/160596 | 0.0178 | 1.

KeyboardInterrupt: 

In [37]:
train(args)

{'recall': 0.996594, 'recall_grapheme': 0.995644, 'recall_vowel': 0.997221, 'recall_consonant': 0.997868, 'acc_grapheme': 0.994757, 'acc_vowel': 0.997888, 'acc_consonant': 0.998037, 'loss_grapheme': 0.04701, 'loss_vowel': 0.031229, 'loss_consonant': 0.019246}
    0 | 0.000050 | 153600/160596 | 2.9915 | 0.9949 |
val: {'recall': 0.995376, 'recall_grapheme': 0.993672, 'recall_vowel': 0.996709, 'recall_consonant': 0.99745, 'acc_grapheme': 0.992545, 'acc_vowel': 0.997167, 'acc_consonant': 0.997465, 'loss_grapheme': 0.065048, 'loss_vowel': 0.045178, 'loss_consonant': 0.028609}




    1 | 0.000049 | 146688/160596 | 2.7502 | 0.9795 |
val: {'recall': 0.995199, 'recall_grapheme': 0.993832, 'recall_vowel': 0.996334, 'recall_consonant': 0.996799, 'acc_grapheme': 0.993117, 'acc_vowel': 0.997093, 'acc_consonant': 0.997217, 'loss_grapheme': 0.070144, 'loss_vowel': 0.042003, 'loss_consonant': 0.029556}
    2 | 0.000047 | 139776/160596 | 0.9920 | 1.1780 |
val: {'recall': 0.996026, 'recall_grapheme': 0.994619, 'recall_vowel': 0.997464, 'recall_consonant': 0.997399, 'acc_grapheme': 0.99431, 'acc_vowel': 0.997913, 'acc_consonant': 0.997813, 'loss_grapheme': 0.079974, 'loss_vowel': 0.048938, 'loss_consonant': 0.029277}
    3 | 0.000043 | 132864/160596 | 1.9856 | 0.9920 |
val: {'recall': 0.995362, 'recall_grapheme': 0.994091, 'recall_vowel': 0.996464, 'recall_consonant': 0.9968, 'acc_grapheme': 0.993266, 'acc_vowel': 0.99754, 'acc_consonant': 0.997689, 'loss_grapheme': 0.047206, 'loss_vowel': 0.03002, 'loss_consonant': 0.02143}
    4 | 0.000038 | 125952/160596 | 1.1945 | 1.010

   26 | 0.000043 | 127488/160596 | 0.3480 | 0.9857 |
val: {'recall': 0.996106, 'recall_grapheme': 0.995074, 'recall_vowel': 0.996828, 'recall_consonant': 0.997447, 'acc_grapheme': 0.993987, 'acc_vowel': 0.997664, 'acc_consonant': 0.997863, 'loss_grapheme': 0.04099, 'loss_vowel': 0.025943, 'loss_consonant': 0.017605}
   27 | 0.000038 | 120576/160596 | 1.9621 | 1.0447 |
val: {'recall': 0.995938, 'recall_grapheme': 0.994672, 'recall_vowel': 0.996948, 'recall_consonant': 0.99746, 'acc_grapheme': 0.994061, 'acc_vowel': 0.99754, 'acc_consonant': 0.997863, 'loss_grapheme': 0.037164, 'loss_vowel': 0.024009, 'loss_consonant': 0.017022}
   28 | 0.000032 | 113664/160596 | 2.1776 | 0.9960 |
val: {'recall': 0.996103, 'recall_grapheme': 0.994855, 'recall_vowel': 0.997058, 'recall_consonant': 0.997644, 'acc_grapheme': 0.994484, 'acc_vowel': 0.997863, 'acc_consonant': 0.997913, 'loss_grapheme': 0.040958, 'loss_vowel': 0.026224, 'loss_consonant': 0.01789}
   29 | 0.000026 | 106752/160596 | 0.0052 | 0.9

KeyboardInterrupt: 

In [49]:
train(args)

{'recall': 0.996243, 'recall_grapheme': 0.9952, 'recall_vowel': 0.9978, 'recall_consonant': 0.996773, 'acc_grapheme': 0.994682, 'acc_vowel': 0.998037, 'acc_consonant': 0.99831, 'loss_grapheme': 0.038524, 'loss_vowel': 0.023072, 'loss_consonant': 0.015039}
    0 | 0.000050 | 153600/160596 | 0.0004 | 1.0014 |
val: {'recall': 0.99648, 'recall_grapheme': 0.995899, 'recall_vowel': 0.997387, 'recall_consonant': 0.996733, 'acc_grapheme': 0.995279, 'acc_vowel': 0.997962, 'acc_consonant': 0.998087, 'loss_grapheme': 0.024084, 'loss_vowel': 0.012797, 'loss_consonant': 0.01014}
** saved
    1 | 0.000049 | 146688/160596 | 1.0808 | 0.9460 |
val: {'recall': 0.995629, 'recall_grapheme': 0.994899, 'recall_vowel': 0.997073, 'recall_consonant': 0.995644, 'acc_grapheme': 0.994111, 'acc_vowel': 0.997639, 'acc_consonant': 0.997764, 'loss_grapheme': 0.05105, 'loss_vowel': 0.033326, 'loss_consonant': 0.02077}
    2 | 0.000047 | 139776/160596 | 3.1113 | 1.1620 |
val: {'recall': 0.996102, 'recall_grapheme': 0.9

Exception ignored in: <function _releaseLock at 0x7fb3a5f35840>
Traceback (most recent call last):
  File "/home/chec/anaconda3/lib/python3.7/logging/__init__.py", line 221, in _releaseLock
    def _releaseLock():
KeyboardInterrupt


RuntimeError: DataLoader worker (pid(s) 92078, 92079, 92080, 92081) exited unexpectedly

In [39]:
del model