In [1]:
import os
import pandas as pd
import numpy as np
import time, gc
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pretrainedmodels
from argparse import Namespace
from sklearn.utils import shuffle
from apex import amp
from efficientnet_pytorch import EfficientNet


In [2]:
!ls /home/chec/data/bengali

class_map.csv		       train.csv
sample_submission.csv	       train.csv.zip
test.csv		       train_image_data_0.parquet
test_image_data_0.parquet      train_image_data_0.parquet.zip
test_image_data_0.parquet.zip  train_image_data_1.parquet
test_image_data_1.parquet      train_image_data_1.parquet.zip
test_image_data_1.parquet.zip  train_image_data_2.parquet
test_image_data_2.parquet      train_image_data_2.parquet.zip
test_image_data_2.parquet.zip  train_image_data_3.parquet
test_image_data_3.parquet      train_image_data_3.parquet.zip
test_image_data_3.parquet.zip


In [3]:
DATA_DIR = '/home/chec/data/bengali'

In [4]:
train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
test_df = pd.read_csv(f'{DATA_DIR}/test.csv')
class_map_df = pd.read_csv(f'{DATA_DIR}/class_map.csv')
sample_sub_df = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [5]:
import albumentations as albu

def get_train_augs(p=1.):
    return albu.Compose([
        #albu.HorizontalFlip(.5),
        albu.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5 ),
        albu.Blur(blur_limit=3, p=0.3),
        albu.OpticalDistortion(p=0.3),
        albu.GaussNoise(p=0.3)
        #albu.GridDistortion(p=.33),
        #albu.HueSaturationValue(p=.33) # not for grey scale
    ], p=p)

In [6]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

HEIGHT = 137
WIDTH = 236

class BengaliDataset(Dataset):
    def __init__(self, df, img_df, train_mode=True, test_mode=False):
        self.df = df
        self.img_df = img_df
        self.train_mode = train_mode
        self.test_mode = test_mode

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self.get_img(row.image_id)
        #print(img.shape)
        if self.train_mode:
            augs = get_train_augs()
            img = augs(image=img)['image']
        
        img = np.expand_dims(img, axis=-1)
        
        img = transforms.functional.to_tensor(img)
        #img = transforms.functional.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        
        if self.test_mode:
            return img
        else:
            return img, torch.tensor([row.grapheme_root, row.vowel_diacritic, row.consonant_diacritic])

    def get_img(self, img_id):
        return 255 - self.img_df.loc[img_id].values.reshape(HEIGHT, WIDTH).astype(np.uint8)

    def __len__(self):
        return len(self.df)
    
def get_train_val_loaders(batch_size=4, val_batch_size=4, dev_mode=False):
    train_df = pd.read_csv(f'{DATA_DIR}/train.csv')
    train_df = shuffle(train_df, random_state=1234)
    print(train_df.shape)

    if dev_mode:
        img_df = pd.read_parquet(f'{DATA_DIR}/train_image_data_0.parquet').set_index('image_id')
        train_df = train_df.iloc[:1000]
    else:
        img_dfs = [pd.read_parquet(f'{DATA_DIR}/train_image_data_{i}.parquet') for i in range(4)]
        img_df = pd.concat(img_dfs, axis=0).set_index('image_id')
    print(img_df.shape)
    split_index = int(len(train_df) * 0.9)
    
    train = train_df.iloc[:split_index]
    val = train_df.iloc[split_index:]
    print(train.shape, val.shape)
    
    train_ds = BengaliDataset(train, img_df, True, False)
    val_ds = BengaliDataset(val, img_df, False, False)
    
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=True)
    train_loader.num = len(train_ds)

    val_loader = DataLoader(val_ds, batch_size=val_batch_size, shuffle=False, num_workers=8, drop_last=False)
    val_loader.num = len(val_ds)

    return train_loader, val_loader

In [7]:
#train_loader, val_loader = get_train_val_loaders(dev_mode=True)

# model

In [8]:
#import pretrainedmodels

In [9]:
print(pretrainedmodels.model_names)

['fbresnet152', 'bninception', 'resnext101_32x4d', 'resnext101_64x4d', 'inceptionv4', 'inceptionresnetv2', 'alexnet', 'densenet121', 'densenet169', 'densenet201', 'densenet161', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'inceptionv3', 'squeezenet1_0', 'squeezenet1_1', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19_bn', 'vgg19', 'nasnetamobile', 'nasnetalarge', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn131', 'dpn107', 'xception', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 'se_resnext50_32x4d', 'se_resnext101_32x4d', 'cafferesnet101', 'pnasnet5large', 'polynet']


In [10]:
#model_name = 'resnet50' # could be fbresnet152 or inceptionresnetv2
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet').cuda()
#model.eval()

In [11]:
#model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained=False).cuda()


In [12]:
#model.features(torch.randn((2, 3, 137, 236)).cuda()).size()

In [13]:
#model.last_linear.in_features

In [14]:
class BengaliNet(nn.Module):
    def __init__(self, backbone_name):
        super(BengaliNet, self).__init__()
        self.n_grapheme = 168
        self.n_vowel = 11
        self.n_consonant = 7
        self.backbone_name = backbone_name
        
        self.num_classes = self.n_grapheme + self.n_vowel + self.n_consonant
        
        self.conv0 = nn.Conv2d(1, 3, kernel_size=1, stride=1, padding=0)
        
        if self.backbone_name.startswith('efficient'):
            self.backbone = EfficientNet.from_pretrained(self.backbone_name)
            self.fc = nn.Linear(self.backbone._fc.in_features, self.num_classes)
        else:
            self.backbone = pretrainedmodels.__dict__[self.backbone_name](num_classes=1000, pretrained='imagenet')
            self.fc = nn.Linear(self.backbone.last_linear.in_features, self.num_classes)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        
    def logits(self, x):
        x = self.avg_pool(x)
        x = F.dropout2d(x, 0.2, self.training)
        x = x.view(x.size(0), -1)
        return self.fc(x)
    
    def forward(self, x):
        x = self.conv0(x)
        #print(x.size())
        if self.backbone_name.startswith('efficient'):
            x = self.backbone.extract_features(x)
        else:
            x = self.backbone.features(x)
        x = self.logits(x)

        return x

In [15]:
MODEL_DIR = './models'
def create_model(args):
    model = BengaliNet(backbone_name=args.backbone)
    model_file = os.path.join(MODEL_DIR, args.backbone, args.ckp_name)

    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)

    print('model file: {}, exist: {}'.format(model_file, os.path.exists(model_file)))

    if args.predict and (not os.path.exists(model_file)):
        raise AttributeError('model file does not exist: {}'.format(model_file))

    if os.path.exists(model_file):
        print('loading {}...'.format(model_file))
        model.load_state_dict(torch.load(model_file))
    
    return model, model_file

In [16]:
'''
args = Namespace()
args.backbone = 'se_resnext50_32x4d'
args.ckp_name = 'best_model.pth'
args.predict = False

bnet = create_model(args)[0].cuda()
'''

"\nargs = Namespace()\nargs.backbone = 'se_resnext50_32x4d'\nargs.ckp_name = 'best_model.pth'\nargs.predict = False\n\nbnet = create_model(args)[0].cuda()\n"

In [17]:
#bnet = BengaliNet('se_resnext50_32x4d').cuda()

In [18]:
#bnet(torch.randn((2, 1, 137, 236)).cuda()).size()

# train

In [19]:
round(1/9, 6)

0.111111

In [20]:
import numpy as np
import sklearn.metrics
import torch


def macro_recall(pred_y, y, n_grapheme=168, n_vowel=11, n_consonant=7):
    pred_y = torch.split(pred_y, [n_grapheme, n_vowel, n_consonant], dim=1)
    pred_labels = [torch.argmax(py, dim=1).cpu().numpy() for py in pred_y]

    #y = y.cpu().numpy()
    # pred_y = [p.cpu().numpy() for p in pred_y]

    recall_grapheme = sklearn.metrics.recall_score(pred_labels[0], y_grapheme, average='macro')
    recall_vowel = sklearn.metrics.recall_score(pred_labels[1], y_vowel, average='macro')
    recall_consonant = sklearn.metrics.recall_score(pred_labels[2], y_consonant, average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_score = np.average(scores, weights=[2, 1, 1])
    # print(f'recall: grapheme {recall_grapheme}, vowel {recall_vowel}, consonant {recall_consonant}, '
    #       f'total {final_score}, y {y.shape}')
    return final_score

def calc_metrics(preds0, preds1, preds2, y):
    assert len(y) == len(preds0) == len(preds1) == len(preds2)

    recall_grapheme = sklearn.metrics.recall_score(preds0, y[:, 0], average='macro')
    recall_vowel = sklearn.metrics.recall_score(preds1, y[:, 1], average='macro')
    recall_consonant = sklearn.metrics.recall_score(preds2, y[:, 2], average='macro')
    scores = [recall_grapheme, recall_vowel, recall_consonant]
    final_recall_score = np.average(scores, weights=[2, 1, 1])
    
    metrics = {}
    metrics['recall'] = round(final_recall_score, 6)
    metrics['recall_grapheme'] = round(recall_grapheme, 6)
    metrics['recall_vowel'] = round(recall_vowel, 6)
    metrics['recall_consonant'] = round(recall_consonant, 6)
    
    metrics['acc_grapheme'] = round((preds0 == y[:, 0]).sum() / len(y), 6)
    metrics['acc_vowel'] = round((preds1 == y[:, 1]).sum() / len(y), 6)
    metrics['acc_consonant'] = round((preds2 == y[:, 2]).sum() / len(y), 6)
    
    
    return metrics

In [21]:
def accuracy(output, label, topk=(1,)):
    maxk = max(topk)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(label.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).sum().item()
        res.append(correct_k)
    return res

In [22]:
def criterion(outputs, y_true):
    # outputs: (N, 182)
    # y_true: (N, 3)
    
    outputs = torch.split(outputs, [168, 11, 7], dim=1)
    loss0 = F.cross_entropy(outputs[0], y_true[:, 0], reduction='mean')
    loss1 = F.cross_entropy(outputs[1], y_true[:, 1], reduction='mean')
    loss2 = F.cross_entropy(outputs[2], y_true[:, 2], reduction='mean')
    
    return loss0 + loss1 + loss2 #, loss0.item(), loss1.item(), loss2.item()

In [23]:
def validate(model, val_loader):
    model.eval()
    loss0, loss1, loss2 = 0., 0., 0.
    preds0, preds1,preds2 = [], [], []
    y_true = []
    with torch.no_grad():
        for x, y in val_loader:
            y_true.append(y)
            x, y = x.cuda(), y.cuda()
            outputs = model(x)
            outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
            preds0.append(torch.max(outputs[0], dim=1)[1])
            preds1.append(torch.max(outputs[1], dim=1)[1])
            preds2.append(torch.max(outputs[2], dim=1)[1])
            loss0 += F.cross_entropy(outputs[0], y[:, 0], reduction='sum').item()
            loss1 += F.cross_entropy(outputs[1], y[:, 1], reduction='sum').item()
            loss2 += F.cross_entropy(outputs[2], y[:, 2], reduction='sum').item()
            
            # for debug
            #metrics = {}
            #metrics['loss_grapheme'] =  F.cross_entropy(outputs[0], y[:, 0], reduction='mean').item()
            #metrics['loss_vowel'] =  F.cross_entropy(outputs[1], y[:, 1], reduction='mean').item()
            #metrics['loss_consonant'] =  F.cross_entropy(outputs[2], y[:, 2], reduction='mean').item()
            #return metrics
    
    preds0 = torch.cat(preds0, 0).cpu().numpy()
    preds1 = torch.cat(preds1, 0).cpu().numpy()
    preds2 = torch.cat(preds2, 0).cpu().numpy()
    y_true = torch.cat(y_true, 0).numpy()
    
    #print('y_true:', y_true.shape)
    #print('preds0:', preds0.shape)
    
    metrics = calc_metrics(preds0, preds1, preds2, y_true)
    metrics['loss_grapheme'] = round(loss0 / val_loader.num, 6)
    metrics['loss_vowel'] = round(loss1 / val_loader.num, 6)
    metrics['loss_consonant'] = round(loss2 / val_loader.num, 6)
    
    return metrics
            

In [24]:
def get_lrs(optimizer):
    lrs = []
    for pgs in optimizer.state_dict()['param_groups']:
        lrs.append(pgs['lr'])
    lrs = ['{:.6f}'.format(x) for x in lrs]
    return lrs

In [25]:
def save_model(model, model_file):
    parent_dir = os.path.dirname(model_file)
    if not os.path.exists(parent_dir):
        os.makedirs(parent_dir)
    if isinstance(model, nn.DataParallel):
        torch.save(model.module.state_dict(), model_file)
    else:
        torch.save(model.state_dict(), model_file)

In [26]:
def mixup(data, targets, alpha=1):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)
    data = data * lam + shuffled_data * (1 - lam)
    targets = (targets, shuffled_targets, lam)

    return data, targets


def mixup_criterion(outputs, targets):
    targets1, targets2, lam = targets
    #criterion = nn.CrossEntropyLoss(reduction='mean')
    return lam * criterion(outputs, targets1) + (1 - lam) * criterion(outputs, targets2)

In [27]:
def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2

In [28]:
def train(args):
    global model

    if args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0001)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=0.0001)

    if args.lrs == 'plateau':
        lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=args.factor, patience=args.patience, min_lr=args.min_lr)
    else:
        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.t_max, eta_min=args.min_lr)
        
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1",verbosity=0)
    
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    best_metrics = 0.
    best_key = 'recall'
    
    val_metrics = validate(model, val_loader)
    print(val_metrics)
    best_metrics = val_metrics[best_key]
    
    model.train()
    #optimizer.zero_grad()

    #if args.lrs == 'plateau':
    #    lr_scheduler.step(best_metrics)
    #else:
    #    lr_scheduler.step()
    train_iter = 0

    for epoch in range(args.num_epochs):
        train_loss = 0

        current_lr = get_lrs(optimizer)
        bg = time.time()
        for batch_idx, (img, targets) in enumerate(train_loader):
            train_iter += 1
            img, targets  = img.cuda(), targets.cuda()
            #do_mixup = False #(np.random.random() < 0.4)
            
            #if do_mixup:
            #    img, targets = mixup(img, targets)
            batch_size = img.size(0)
          
            
            
            #if do_mixup:
            #    loss = mixup_criterion(outputs, targets)
            #else:
            #    loss = criterion(outputs, targets)
            r = np.random.rand(1)
            if args.beta > 0 and r < args.cutmix_prob:
                # generate mixed sample
                lam = np.random.beta(args.beta, args.beta)
                rand_index = torch.randperm(img.size()[0]).cuda()
                target_a = targets
                target_b = targets[rand_index]
                bbx1, bby1, bbx2, bby2 = rand_bbox(img.size(), lam)
                img[:, :, bbx1:bbx2, bby1:bby2] = img[rand_index, :, bbx1:bbx2, bby1:bby2]
                # adjust lambda to exactly match pixel ratio
                lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (img.size()[-1] * img.size()[-2]))
                # compute output
                outputs = model(img)
                loss = criterion(outputs, target_a) * lam + criterion(outputs, target_b) * (1. - lam)
            else:
                outputs = model(img)
                loss = criterion(outputs, targets)
            
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            
            #loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            
            #if batch_idx % 4 == 0:
            #    optimizer.step()
            #    optimizer.zero_grad()

            train_loss += loss.item()
            print('\r {:4d} | {:.6f} | {:06d}/{} | {:.4f} | {:.4f} |'.format(
                epoch, float(current_lr[0]), batch_size*(batch_idx+1), train_loader.num, 
                loss.item(), train_loss/(batch_idx+1)), end='')

            if train_iter > 0 and train_iter % args.iter_val == 0:
                #outputs = torch.split(outputs, [168, 11, 7], dim=1)
            
                #preds0 = (torch.max(outputs[0], dim=1)[1]).cpu().numpy()
                #preds1 = (torch.max(outputs[1], dim=1)[1]).cpu().numpy()
                #preds2 = (torch.max(outputs[2], dim=1)[1]).cpu().numpy()
                #train_metrics = calc_metrics(preds0, preds1, preds2, targets.cpu().numpy())
                #print('train:', train_metrics)
                #save_model(model, model_file+'_latest')
                val_metrics = validate(model, val_loader)
                print('\nval:', val_metrics)
                
                if val_metrics[best_key] > best_metrics:
                    best_metrics = val_metrics[best_key]
                    save_model(model, model_file)
                    print('** saved')
                
                model.train()
                
                if args.lrs == 'plateau':
                    lr_scheduler.step(best_metrics)
                else:
                    lr_scheduler.step()
                current_lr = get_lrs(optimizer)
        
    

In [34]:
args = Namespace()
args.backbone = 'inceptionresnetv2'
args.ckp_name = 'best_model.pth'
args.predict = False
args.optim = 'SGD'
args.lr = 1e-4
args.lrs = 'cosine'
args.t_max = 12
args.factor = 0.6
args.patience = 3
args.min_lr = 1e-5
args.iter_val = 200
args.num_epochs = 100000
args.batch_size = 1024
args.val_batch_size = 2048

args.beta = 1.0
args.cutmix_prob = 0.5

In [30]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size)

(200840, 5)
(200840, 32332)
(180756, 5) (20084, 5)


In [35]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


model file: ./models/inceptionresnetv2/best_model.pth, exist: True
loading ./models/inceptionresnetv2/best_model.pth...


In [None]:
train(args) #inceptionresnetv2

{'recall': 0.976341, 'recall_grapheme': 0.964064, 'recall_vowel': 0.987263, 'recall_consonant': 0.989972, 'acc_grapheme': 0.965545, 'acc_vowel': 0.990042, 'acc_consonant': 0.990341, 'loss_grapheme': 0.186893, 'loss_vowel': 0.082564, 'loss_consonant': 0.065909}
    1 | 0.000100 | 024576/180756 | 0.0085 | 1.1132 |
val: {'recall': 0.975258, 'recall_grapheme': 0.963895, 'recall_vowel': 0.987486, 'recall_consonant': 0.985757, 'acc_grapheme': 0.965346, 'acc_vowel': 0.990141, 'acc_consonant': 0.990092, 'loss_grapheme': 0.152215, 'loss_vowel': 0.049587, 'loss_consonant': 0.0435}
    2 | 0.000098 | 049152/180756 | 2.4519 | 2.2276 |
val: {'recall': 0.976364, 'recall_grapheme': 0.964881, 'recall_vowel': 0.987465, 'recall_consonant': 0.988231, 'acc_grapheme': 0.965893, 'acc_vowel': 0.990092, 'acc_consonant': 0.990341, 'loss_grapheme': 0.178467, 'loss_vowel': 0.080176, 'loss_consonant': 0.063563}
** saved
    3 | 0.000094 | 073728/180756 | 0.0117 | 1.8251 |
val: {'recall': 0.976059, 'recall_graphem

** saved
   29 | 0.000098 | 098304/180756 | 0.0068 | 1.9750 |
val: {'recall': 0.976667, 'recall_grapheme': 0.965217, 'recall_vowel': 0.986929, 'recall_consonant': 0.989306, 'acc_grapheme': 0.966242, 'acc_vowel': 0.990042, 'acc_consonant': 0.990191, 'loss_grapheme': 0.161852, 'loss_vowel': 0.066753, 'loss_consonant': 0.053954}
   30 | 0.000094 | 122880/180756 | 1.8115 | 1.6612 |
val: {'recall': 0.976375, 'recall_grapheme': 0.964423, 'recall_vowel': 0.987598, 'recall_consonant': 0.989056, 'acc_grapheme': 0.965893, 'acc_vowel': 0.99044, 'acc_consonant': 0.990141, 'loss_grapheme': 0.166233, 'loss_vowel': 0.067427, 'loss_consonant': 0.056925}
   31 | 0.000087 | 147456/180756 | 4.1691 | 1.3480 |
val: {'recall': 0.975713, 'recall_grapheme': 0.964196, 'recall_vowel': 0.987234, 'recall_consonant': 0.987227, 'acc_grapheme': 0.966391, 'acc_vowel': 0.990141, 'acc_consonant': 0.990042, 'loss_grapheme': 0.161361, 'loss_vowel': 0.064096, 'loss_consonant': 0.053293}
   32 | 0.000078 | 172032/180756 | 

   59 | 0.000087 | 016384/180756 | 4.0898 | 2.1518 |
val: {'recall': 0.977239, 'recall_grapheme': 0.966465, 'recall_vowel': 0.987357, 'recall_consonant': 0.988669, 'acc_grapheme': 0.966939, 'acc_vowel': 0.990191, 'acc_consonant': 0.99049, 'loss_grapheme': 0.208402, 'loss_vowel': 0.109987, 'loss_consonant': 0.081441}
** saved
   60 | 0.000077 | 040960/180756 | 3.0870 | 1.3461 |
val: {'recall': 0.975346, 'recall_grapheme': 0.96423, 'recall_vowel': 0.987313, 'recall_consonant': 0.98561, 'acc_grapheme': 0.965943, 'acc_vowel': 0.990191, 'acc_consonant': 0.990042, 'loss_grapheme': 0.155403, 'loss_vowel': 0.05597, 'loss_consonant': 0.047815}
   61 | 0.000067 | 065536/180756 | 0.0117 | 2.2741 |
val: {'recall': 0.976287, 'recall_grapheme': 0.964388, 'recall_vowel': 0.987538, 'recall_consonant': 0.988831, 'acc_grapheme': 0.966341, 'acc_vowel': 0.99039, 'acc_consonant': 0.990092, 'loss_grapheme': 0.168969, 'loss_vowel': 0.071869, 'loss_consonant': 0.05789}
   62 | 0.000055 | 090112/180756 | 0.011

   88 | 0.000067 | 114688/180756 | 3.7324 | 1.5624 |
val: {'recall': 0.976673, 'recall_grapheme': 0.965096, 'recall_vowel': 0.988147, 'recall_consonant': 0.988351, 'acc_grapheme': 0.966491, 'acc_vowel': 0.99039, 'acc_consonant': 0.99059, 'loss_grapheme': 0.184828, 'loss_vowel': 0.08589, 'loss_consonant': 0.067056}
   89 | 0.000055 | 139264/180756 | 0.0281 | 1.6649 |
val: {'recall': 0.975047, 'recall_grapheme': 0.963727, 'recall_vowel': 0.987459, 'recall_consonant': 0.985274, 'acc_grapheme': 0.965644, 'acc_vowel': 0.990092, 'acc_consonant': 0.989992, 'loss_grapheme': 0.157127, 'loss_vowel': 0.056618, 'loss_consonant': 0.047855}
   90 | 0.000043 | 163840/180756 | 0.0075 | 1.7707 |
val: {'recall': 0.975891, 'recall_grapheme': 0.964574, 'recall_vowel': 0.987614, 'recall_consonant': 0.986804, 'acc_grapheme': 0.966391, 'acc_vowel': 0.99044, 'acc_consonant': 0.990042, 'loss_grapheme': 0.153706, 'loss_vowel': 0.0561, 'loss_consonant': 0.047958}
   92 | 0.000032 | 008192/180756 | 0.0077 | 1.721

In [33]:
del model

In [42]:
train(args) #efficient b1

{'recall': 0.07458, 'recall_grapheme': 0.001257, 'recall_vowel': 0.13502, 'recall_consonant': 0.160785, 'acc_grapheme': 0.003635, 'acc_vowel': 0.149173, 'acc_consonant': 0.18084, 'loss_grapheme': 5.140628, 'loss_vowel': 2.363855, 'loss_consonant': 1.917337}
    1 | 0.000040 | 024576/180756 | 5.9492 | 6.4747 |
val: {'recall': 0.278253, 'recall_grapheme': 0.013213, 'recall_vowel': 0.477369, 'recall_consonant': 0.609216, 'acc_grapheme': 0.072296, 'acc_vowel': 0.746664, 'acc_consonant': 0.778978, 'loss_grapheme': 4.377518, 'loss_vowel': 0.917273, 'loss_consonant': 0.688966}
** saved




    2 | 0.000039 | 049152/180756 | 4.4458 | 5.4273 |
val: {'recall': 0.437907, 'recall_grapheme': 0.144446, 'recall_vowel': 0.823923, 'recall_consonant': 0.638812, 'acc_grapheme': 0.24059, 'acc_vowel': 0.838229, 'acc_consonant': 0.895738, 'loss_grapheme': 3.429989, 'loss_vowel': 0.630349, 'loss_consonant': 0.404714}
** saved
    3 | 0.000037 | 073728/180756 | 3.2732 | 4.7400 |
val: {'recall': 0.58344, 'recall_grapheme': 0.327591, 'recall_vowel': 0.888892, 'recall_consonant': 0.789686, 'acc_grapheme': 0.45942, 'acc_vowel': 0.898476, 'acc_consonant': 0.924716, 'loss_grapheme': 2.420991, 'loss_vowel': 0.459054, 'loss_consonant': 0.309041}
** saved
    4 | 0.000034 | 098304/180756 | 2.5346 | 4.3281 |
val: {'recall': 0.754576, 'recall_grapheme': 0.567345, 'recall_vowel': 0.932444, 'recall_consonant': 0.951171, 'acc_grapheme': 0.632693, 'acc_vowel': 0.932135, 'acc_consonant': 0.940948, 'loss_grapheme': 1.711213, 'loss_vowel': 0.372935, 'loss_consonant': 0.269201}
** saved
    5 | 0.000030 | 

** saved
   60 | 0.000030 | 040960/180756 | 0.2915 | 1.8240 |
val: {'recall': 0.963969, 'recall_grapheme': 0.942094, 'recall_vowel': 0.981729, 'recall_consonant': 0.98996, 'acc_grapheme': 0.940799, 'acc_vowel': 0.983918, 'acc_consonant': 0.984714, 'loss_grapheme': 0.243609, 'loss_vowel': 0.111367, 'loss_consonant': 0.086747}
** saved
   61 | 0.000026 | 065536/180756 | 2.9280 | 1.9589 |
val: {'recall': 0.96327, 'recall_grapheme': 0.940485, 'recall_vowel': 0.982443, 'recall_consonant': 0.989667, 'acc_grapheme': 0.940151, 'acc_vowel': 0.983967, 'acc_consonant': 0.983718, 'loss_grapheme': 0.255047, 'loss_vowel': 0.117094, 'loss_consonant': 0.089394}
   62 | 0.000021 | 090112/180756 | 0.3134 | 1.9919 |
val: {'recall': 0.963296, 'recall_grapheme': 0.940203, 'recall_vowel': 0.982431, 'recall_consonant': 0.990349, 'acc_grapheme': 0.940749, 'acc_vowel': 0.983868, 'acc_consonant': 0.983967, 'loss_grapheme': 0.24437, 'loss_vowel': 0.105124, 'loss_consonant': 0.082845}
   63 | 0.000015 | 114688/18

   89 | 0.000021 | 139264/180756 | 0.2647 | 2.0814 |
val: {'recall': 0.965821, 'recall_grapheme': 0.945204, 'recall_vowel': 0.984261, 'recall_consonant': 0.988614, 'acc_grapheme': 0.94762, 'acc_vowel': 0.985411, 'acc_consonant': 0.986606, 'loss_grapheme': 0.207762, 'loss_vowel': 0.09633, 'loss_consonant': 0.074066}
   90 | 0.000015 | 163840/180756 | 0.1954 | 2.2083 |
val: {'recall': 0.966486, 'recall_grapheme': 0.946695, 'recall_vowel': 0.983565, 'recall_consonant': 0.988989, 'acc_grapheme': 0.948068, 'acc_vowel': 0.98561, 'acc_consonant': 0.986606, 'loss_grapheme': 0.217433, 'loss_vowel': 0.099862, 'loss_consonant': 0.07997}
   92 | 0.000011 | 008192/180756 | 3.9731 | 2.0994 |
val: {'recall': 0.967025, 'recall_grapheme': 0.946581, 'recall_vowel': 0.98362, 'recall_consonant': 0.991317, 'acc_grapheme': 0.948566, 'acc_vowel': 0.98576, 'acc_consonant': 0.986805, 'loss_grapheme': 0.21569, 'loss_vowel': 0.100773, 'loss_consonant': 0.079335}
** saved
   93 | 0.000007 | 032768/180756 | 5.7383

  119 | 0.000011 | 057344/180756 | 5.5293 | 1.8946 |
val: {'recall': 0.968599, 'recall_grapheme': 0.948878, 'recall_vowel': 0.984752, 'recall_consonant': 0.991886, 'acc_grapheme': 0.95245, 'acc_vowel': 0.986656, 'acc_consonant': 0.987353, 'loss_grapheme': 0.192636, 'loss_vowel': 0.088519, 'loss_consonant': 0.071187}
  120 | 0.000007 | 081920/180756 | 0.1956 | 1.9332 |
val: {'recall': 0.968784, 'recall_grapheme': 0.949254, 'recall_vowel': 0.984958, 'recall_consonant': 0.99167, 'acc_grapheme': 0.953296, 'acc_vowel': 0.986855, 'acc_consonant': 0.987552, 'loss_grapheme': 0.186309, 'loss_vowel': 0.080656, 'loss_consonant': 0.064414}
  121 | 0.000004 | 106496/180756 | 4.4213 | 2.5060 |
val: {'recall': 0.968981, 'recall_grapheme': 0.949402, 'recall_vowel': 0.985234, 'recall_consonant': 0.991885, 'acc_grapheme': 0.952002, 'acc_vowel': 0.986905, 'acc_consonant': 0.986905, 'loss_grapheme': 0.209559, 'loss_vowel': 0.103785, 'loss_consonant': 0.082104}
  122 | 0.000002 | 131072/180756 | 0.1577 | 1

  148 | 0.000004 | 155648/180756 | 0.1456 | 1.8366 |
val: {'recall': 0.970482, 'recall_grapheme': 0.953163, 'recall_vowel': 0.985494, 'recall_consonant': 0.990109, 'acc_grapheme': 0.956433, 'acc_vowel': 0.987453, 'acc_consonant': 0.988897, 'loss_grapheme': 0.172401, 'loss_vowel': 0.074538, 'loss_consonant': 0.061104}
** saved
  149 | 0.000002 | 180224/180756 | 0.1117 | 2.0051 |
val: {'recall': 0.970056, 'recall_grapheme': 0.952477, 'recall_vowel': 0.985124, 'recall_consonant': 0.990146, 'acc_grapheme': 0.955636, 'acc_vowel': 0.987204, 'acc_consonant': 0.988847, 'loss_grapheme': 0.178338, 'loss_vowel': 0.08025, 'loss_consonant': 0.065393}
  151 | 0.000001 | 024576/180756 | 0.1515 | 2.0407 |
val: {'recall': 0.970077, 'recall_grapheme': 0.952493, 'recall_vowel': 0.985174, 'recall_consonant': 0.99015, 'acc_grapheme': 0.955487, 'acc_vowel': 0.987303, 'acc_consonant': 0.988648, 'loss_grapheme': 0.184359, 'loss_vowel': 0.085525, 'loss_consonant': 0.068861}
  152 | 0.000002 | 049152/180756 | 0

KeyboardInterrupt: 

In [40]:
del model

In [22]:
train(args)  # efficientnet-b3 , cv9738 batch_size 1024

{'recall': 0.965397, 'recall_grapheme': 0.944814, 'recall_vowel': 0.982054, 'recall_consonant': 0.989908, 'acc_grapheme': 0.943985, 'acc_vowel': 0.985113, 'acc_consonant': 0.98561, 'loss_grapheme': 0.253814, 'loss_vowel': 0.118889, 'loss_consonant': 0.094402}
    1 | 0.000020 | 024576/180756 | 0.2663 | 2.4646 |
val: {'recall': 0.964915, 'recall_grapheme': 0.944224, 'recall_vowel': 0.981453, 'recall_consonant': 0.989758, 'acc_grapheme': 0.944035, 'acc_vowel': 0.984864, 'acc_consonant': 0.985561, 'loss_grapheme': 0.260207, 'loss_vowel': 0.126617, 'loss_consonant': 0.103775}




    2 | 0.000020 | 049152/180756 | 0.2677 | 2.3186 |
val: {'recall': 0.96517, 'recall_grapheme': 0.946017, 'recall_vowel': 0.981363, 'recall_consonant': 0.987284, 'acc_grapheme': 0.94533, 'acc_vowel': 0.985411, 'acc_consonant': 0.985959, 'loss_grapheme': 0.243896, 'loss_vowel': 0.113981, 'loss_consonant': 0.089509}
    3 | 0.000019 | 073728/180756 | 0.3286 | 2.6794 |
val: {'recall': 0.964866, 'recall_grapheme': 0.944576, 'recall_vowel': 0.982986, 'recall_consonant': 0.987326, 'acc_grapheme': 0.944633, 'acc_vowel': 0.985411, 'acc_consonant': 0.98576, 'loss_grapheme': 0.262579, 'loss_vowel': 0.126556, 'loss_consonant': 0.095788}
    4 | 0.000017 | 098304/180756 | 0.2975 | 2.4868 |
val: {'recall': 0.96526, 'recall_grapheme': 0.944161, 'recall_vowel': 0.98217, 'recall_consonant': 0.990548, 'acc_grapheme': 0.945429, 'acc_vowel': 0.98561, 'acc_consonant': 0.985909, 'loss_grapheme': 0.244029, 'loss_vowel': 0.111893, 'loss_consonant': 0.087076}
    5 | 0.000015 | 122880/180756 | 5.5230 | 2.411

   31 | 0.000017 | 147456/180756 | 1.0917 | 2.2785 |
val: {'recall': 0.96967, 'recall_grapheme': 0.951849, 'recall_vowel': 0.982762, 'recall_consonant': 0.992223, 'acc_grapheme': 0.9524, 'acc_vowel': 0.986507, 'acc_consonant': 0.987552, 'loss_grapheme': 0.215217, 'loss_vowel': 0.102191, 'loss_consonant': 0.085412}
** saved
   32 | 0.000015 | 172032/180756 | 2.0361 | 2.2145 |
val: {'recall': 0.968838, 'recall_grapheme': 0.950552, 'recall_vowel': 0.983134, 'recall_consonant': 0.991114, 'acc_grapheme': 0.951952, 'acc_vowel': 0.986706, 'acc_consonant': 0.987652, 'loss_grapheme': 0.206998, 'loss_vowel': 0.092428, 'loss_consonant': 0.075055}
   34 | 0.000013 | 016384/180756 | 2.7617 | 2.3104 |
val: {'recall': 0.96944, 'recall_grapheme': 0.951163, 'recall_vowel': 0.982992, 'recall_consonant': 0.992442, 'acc_grapheme': 0.952599, 'acc_vowel': 0.986606, 'acc_consonant': 0.987602, 'loss_grapheme': 0.204472, 'loss_vowel': 0.094666, 'loss_consonant': 0.076104}
   35 | 0.000011 | 040960/180756 | 3.8

** saved
   61 | 0.000013 | 065536/180756 | 0.1855 | 2.2404 |
val: {'recall': 0.970651, 'recall_grapheme': 0.953273, 'recall_vowel': 0.983391, 'recall_consonant': 0.992667, 'acc_grapheme': 0.954342, 'acc_vowel': 0.987254, 'acc_consonant': 0.9881, 'loss_grapheme': 0.1906, 'loss_vowel': 0.092087, 'loss_consonant': 0.073611}
   62 | 0.000011 | 090112/180756 | 0.1858 | 2.3109 |
val: {'recall': 0.970687, 'recall_grapheme': 0.952635, 'recall_vowel': 0.98425, 'recall_consonant': 0.993229, 'acc_grapheme': 0.954242, 'acc_vowel': 0.987453, 'acc_consonant': 0.988249, 'loss_grapheme': 0.200222, 'loss_vowel': 0.097787, 'loss_consonant': 0.07707}
   63 | 0.000008 | 114688/180756 | 0.1439 | 2.3343 |
val: {'recall': 0.970776, 'recall_grapheme': 0.953202, 'recall_vowel': 0.984024, 'recall_consonant': 0.992675, 'acc_grapheme': 0.95479, 'acc_vowel': 0.987453, 'acc_consonant': 0.987951, 'loss_grapheme': 0.199982, 'loss_vowel': 0.100248, 'loss_consonant': 0.080494}
   64 | 0.000006 | 139264/180756 | 5.0327

   90 | 0.000008 | 163840/180756 | 4.8655 | 2.0834 |
val: {'recall': 0.971744, 'recall_grapheme': 0.95498, 'recall_vowel': 0.984696, 'recall_consonant': 0.99232, 'acc_grapheme': 0.956881, 'acc_vowel': 0.987702, 'acc_consonant': 0.988299, 'loss_grapheme': 0.192793, 'loss_vowel': 0.094162, 'loss_consonant': 0.074712}
   92 | 0.000006 | 008192/180756 | 4.5999 | 3.7436 |
val: {'recall': 0.971564, 'recall_grapheme': 0.954644, 'recall_vowel': 0.98416, 'recall_consonant': 0.992808, 'acc_grapheme': 0.956931, 'acc_vowel': 0.987851, 'acc_consonant': 0.988299, 'loss_grapheme': 0.187393, 'loss_vowel': 0.090875, 'loss_consonant': 0.071252}
   93 | 0.000004 | 032768/180756 | 0.1494 | 1.7893 |
val: {'recall': 0.971466, 'recall_grapheme': 0.954414, 'recall_vowel': 0.984317, 'recall_consonant': 0.992721, 'acc_grapheme': 0.956682, 'acc_vowel': 0.987951, 'acc_consonant': 0.988399, 'loss_grapheme': 0.183207, 'loss_vowel': 0.088073, 'loss_consonant': 0.068737}
   94 | 0.000002 | 057344/180756 | 5.1835 | 2.

  120 | 0.000004 | 081920/180756 | 3.3982 | 1.8886 |
val: {'recall': 0.972507, 'recall_grapheme': 0.95696, 'recall_vowel': 0.98508, 'recall_consonant': 0.991025, 'acc_grapheme': 0.959221, 'acc_vowel': 0.988697, 'acc_consonant': 0.989444, 'loss_grapheme': 0.179282, 'loss_vowel': 0.086574, 'loss_consonant': 0.067219}
  121 | 0.000002 | 106496/180756 | 0.1157 | 1.8320 |
val: {'recall': 0.972416, 'recall_grapheme': 0.956902, 'recall_vowel': 0.985036, 'recall_consonant': 0.990824, 'acc_grapheme': 0.959221, 'acc_vowel': 0.988847, 'acc_consonant': 0.989295, 'loss_grapheme': 0.166118, 'loss_vowel': 0.073162, 'loss_consonant': 0.059085}
  122 | 0.000001 | 131072/180756 | 0.1019 | 2.1207 |
val: {'recall': 0.972484, 'recall_grapheme': 0.956981, 'recall_vowel': 0.984997, 'recall_consonant': 0.990977, 'acc_grapheme': 0.959271, 'acc_vowel': 0.988598, 'acc_consonant': 0.989444, 'loss_grapheme': 0.171854, 'loss_vowel': 0.079668, 'loss_consonant': 0.063482}
  123 | 0.000001 | 155648/180756 | 0.4524 | 1

KeyboardInterrupt: 

In [70]:
#save_model(model, model_file)

In [24]:
del model

NameError: name 'model' is not defined

In [73]:
!ls ./models/se_resnext50_32x4d

best_model.pth


In [24]:
args = Namespace()
args.backbone = 'densenet201'
args.ckp_name = 'best_model.pth'
args.predict = False
args.optim = 'Adam'
args.lr = 2e-5
args.lrs = 'cosine'
args.t_max = 12
args.factor = 0.6
args.patience = 3
args.min_lr = 1e-6
args.iter_val = 200
args.num_epochs = 100000
args.batch_size = 768
args.val_batch_size = 1024

args.beta = 1.0
args.cutmix_prob = 0.9

In [20]:
train_loader, val_loader = get_train_val_loaders(batch_size=args.batch_size, val_batch_size=args.val_batch_size)

(200840, 5)
(200840, 32332)
(180756, 5) (20084, 5)


In [25]:
model, model_file = create_model(args)
#if torch.cuda.device_count() > 1:
#    model = nn.DataParallel(model)
model = model.cuda()


model file: ./models/densenet201/best_model.pth, exist: True
loading ./models/densenet201/best_model.pth...


In [26]:
train(args)  # densenet201

{'recall': 0.979639, 'recall_grapheme': 0.968705, 'recall_vowel': 0.987912, 'recall_consonant': 0.993236, 'acc_grapheme': 0.966391, 'acc_vowel': 0.990191, 'acc_consonant': 0.989793, 'loss_grapheme': 0.266216, 'loss_vowel': 0.15312, 'loss_consonant': 0.12891}
    0 | 0.000020 | 153600/180756 | 4.5478 | 2.9927 |
val: {'recall': 0.97718, 'recall_grapheme': 0.965373, 'recall_vowel': 0.98753, 'recall_consonant': 0.990444, 'acc_grapheme': 0.964151, 'acc_vowel': 0.990141, 'acc_consonant': 0.988847, 'loss_grapheme': 0.406527, 'loss_vowel': 0.25841, 'loss_consonant': 0.183216}
    1 | 0.000020 | 126720/180756 | 3.9056 | 3.0462 |
val: {'recall': 0.977188, 'recall_grapheme': 0.96641, 'recall_vowel': 0.987693, 'recall_consonant': 0.988237, 'acc_grapheme': 0.964897, 'acc_vowel': 0.989594, 'acc_consonant': 0.989444, 'loss_grapheme': 0.410402, 'loss_vowel': 0.255828, 'loss_consonant': 0.185089}
    2 | 0.000019 | 099840/180756 | 0.0443 | 2.7690 |
val: {'recall': 0.977882, 'recall_grapheme': 0.967617,

   22 | 0.000020 | 023040/180756 | 2.6757 | 3.0263 |
val: {'recall': 0.978681, 'recall_grapheme': 0.966701, 'recall_vowel': 0.988151, 'recall_consonant': 0.99317, 'acc_grapheme': 0.965595, 'acc_vowel': 0.990191, 'acc_consonant': 0.989494, 'loss_grapheme': 0.304428, 'loss_vowel': 0.186286, 'loss_consonant': 0.160129}
   22 | 0.000019 | 176640/180756 | 0.0540 | 3.0258 |
val: {'recall': 0.977527, 'recall_grapheme': 0.968036, 'recall_vowel': 0.986764, 'recall_consonant': 0.987274, 'acc_grapheme': 0.96664, 'acc_vowel': 0.989843, 'acc_consonant': 0.989096, 'loss_grapheme': 0.255726, 'loss_vowel': 0.146429, 'loss_consonant': 0.133479}
   23 | 0.000017 | 149760/180756 | 2.6475 | 3.0810 |
val: {'recall': 0.978796, 'recall_grapheme': 0.96737, 'recall_vowel': 0.987882, 'recall_consonant': 0.99256, 'acc_grapheme': 0.966541, 'acc_vowel': 0.989892, 'acc_consonant': 0.988349, 'loss_grapheme': 0.321729, 'loss_vowel': 0.193959, 'loss_consonant': 0.162415}
   24 | 0.000015 | 122880/180756 | 2.1553 | 2.8

   44 | 0.000017 | 046080/180756 | 3.8302 | 2.8624 |
val: {'recall': 0.978227, 'recall_grapheme': 0.967463, 'recall_vowel': 0.987047, 'recall_consonant': 0.990933, 'acc_grapheme': 0.966192, 'acc_vowel': 0.990092, 'acc_consonant': 0.99039, 'loss_grapheme': 0.351208, 'loss_vowel': 0.218829, 'loss_consonant': 0.160556}
   45 | 0.000015 | 019200/180756 | 3.8934 | 3.0265 |
val: {'recall': 0.978613, 'recall_grapheme': 0.968133, 'recall_vowel': 0.987902, 'recall_consonant': 0.990285, 'acc_grapheme': 0.965644, 'acc_vowel': 0.990042, 'acc_consonant': 0.989594, 'loss_grapheme': 0.361923, 'loss_vowel': 0.234222, 'loss_consonant': 0.163663}
   45 | 0.000013 | 172800/180756 | 2.4299 | 2.8987 |
val: {'recall': 0.978477, 'recall_grapheme': 0.967841, 'recall_vowel': 0.987142, 'recall_consonant': 0.991083, 'acc_grapheme': 0.967038, 'acc_vowel': 0.989793, 'acc_consonant': 0.99049, 'loss_grapheme': 0.350092, 'loss_vowel': 0.211925, 'loss_consonant': 0.152599}
   46 | 0.000011 | 145920/180756 | 4.4946 | 2

KeyboardInterrupt: 

In [27]:
del model