In [1]:
import os
import gc
import re
import json
import glob
import math
import random
import pickle
from pathlib import Path
from datetime import datetime

import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

import albumentations as A
from albumentations.pytorch import ToTensorV2


SEED = 5
BATCH_SIZE = 12
NUM_WORKERS = 6

LR = 6e-5

EPOCHS = 200

MODEL_NAME = 'nfnet_l2-2210'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pd.set_option('display.max_colwidth', 160)
print('DEVICE =', device)


cv2.setNumThreads(4)

DEVICE = cuda


In [2]:
df = pd.read_csv('train.csv')
for i in range(1, 9):
    df[f'sing{i}'] = df[f'sing{i}'].values.astype(int)
df.head(3)

Unnamed: 0,id,img,sing1,sing2,sing3,sing4,sing5,sing6,sing7,sing8
0,807,5-avi-frame24_jpg.rf.5dec372f9195e9a88ff7dd3bd443eab6.jpg,1,2,37,0,0,0,0,0
1,121,6-avi-frame6431_jpg.rf.1ad48ac0ce545b88cefb9463d1d3202d.jpg,8,0,0,0,0,0,0,0
2,1130,9-avi-frame1457_jpg.rf.634a979898a9caa4d106913089f0f45c.jpg,21,0,0,0,0,0,0,0


In [3]:
num_classes = df[df.columns[2:]].values.max( ) + 1
print(num_classes)

70


In [4]:
df['signs'] = (df[df.columns[2:]] > 0).sum(axis=1).values
df.tail(3)

Unnamed: 0,id,img,sing1,sing2,sing3,sing4,sing5,sing6,sing7,sing8,signs
775,873,6-avi-frame6490_jpg.rf.b19dac7915db559d41e7e27ea2dc5556.jpg,8,4,5,0,0,0,0,0,3
776,173,5-avi-frame4887_jpg.rf.9a87ce8aac43977d561d819eacbdec3f.jpg,38,51,0,0,0,0,0,0,2
777,1008,5-avi-frame3736_jpg.rf.042f819698bc45df385c923738ba083b.jpg,2,0,0,0,0,0,0,0,1


In [5]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.0)

img_model = timm.create_model('eca_nfnet_l2', pretrained=True)
in_features = img_model.head.fc.in_features
img_model.head.fc = nn.Sequential(nn.Dropout1d(0.15), nn.Linear(in_features, 1024))
in_features = 1024

img_model.head.fc.apply(init_weights)

Sequential(
  (0): Dropout1d(p=0.15, inplace=False)
  (1): Linear(in_features=3072, out_features=1024, bias=True)
)

Учимся через ArcFace на количество знаков. Через BCELoss - на мультилейбл

In [6]:
def seed_everything(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False


class ArcFaceClassifier(nn.Module):
    def __init__(self, in_features, output_classes, droput_p=0.25):
        super().__init__()
        self.in_features = in_features
        self.initial_layers=nn.Sequential(
            nn.BatchNorm1d(in_features),
            nn.Dropout(droput_p))
        self.W = nn.Parameter(torch.Tensor(in_features, output_classes))
        nn.init.kaiming_uniform_(self.W)
    def forward(self, x):
        x = self.initial_layers(x)
        x_norm = F.normalize(x)
        W_norm = F.normalize(self.W, dim=0)
        return x_norm @ W_norm


class ArcMarginProductLoss():
    def __init__(self, out_features, scale=30.0, margin=0.50, easy_margin=False, ls_eps=0.0):
        self.scale = scale
        self.margin = margin
        self.ls_eps = ls_eps  # label smoothing
        self.out_features=out_features
        self.easy_margin = easy_margin
        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def __call__(self, cosine, label):
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
    
        one_hot = torch.zeros(cosine.size(), device='cuda')
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.out_features

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.scale
        return F.cross_entropy(output, label)


class Model(nn.Module):

    def __init__(self, 
                 img_model,
                 num_classes: int = num_classes,
                 in_features:int = in_features):
        
        super().__init__()
        self.in_features = in_features
        self.img_model = img_model
        self.signs       = ArcFaceClassifier(in_features, 8)
        self.cls         = nn.Sequential(nn.Dropout1d(0.15), nn.Linear(in_features, num_classes))
        
        self.signs.apply(init_weights)

    def forward(self, x):
        x = self.img_model(x)

        return {
            'signs': self.signs(x),
            'classes': self.cls(x),
            'emb': x
        }

seed_everything(SEED)
model = Model(img_model)

In [7]:
transform_train = A.Compose([
    A.RandomResizedCrop(320, 320, scale=(0.80, 1.0), ratio=(0.85, 1.15)),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(p=0.5),
    A.ImageCompression(quality_lower=50, quality_upper=100, p=0.3),
    A.OneOf([
        A.GaussNoise(var_limit=5, p=1.0),
        A.MedianBlur(blur_limit=3, p=1.0),
        A.Blur(blur_limit=3, p=1.0),
    ], p=0.2),
    A.ShiftScaleRotate(shift_limit  = 0.05,
                       scale_limit  = 0.10,
                       rotate_limit = 7.0,
                       p=0.3),
    A.OneOf([
        A.OpticalDistortion(p=1),
        A.Perspective(p=1),
        A.GridDistortion(p=1),
    ], p=0.2),
    A.OneOf([
        A.CLAHE(clip_limit=2, p=1),
        A.Sharpen(p=1),
        A.Emboss(p=1),
        A.RandomBrightnessContrast(p=1),            
    ], p=0.2),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

transform_val = A.Compose([
    A.Resize(320, 320),
    A.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
    ToTensorV2()
])

In [8]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, dataframe: pd.DataFrame,
                 transforms: A.Compose = None,
                 num_classes: int = num_classes,
                 mode: str = 'train'):

        self.num_classes = num_classes
        self.mode  = mode
        self.imgs  = dataframe['img'].apply(lambda x: os.path.join(mode, f'{x}')).values
        self.transforms = transforms
        if mode == 'train':
            self.signs = dataframe['signs'].values
            self.sign  = df[[f'sing{i}' for i in range(1, 9)]].values

    def __getitem__(self, idx) -> dict:

        image_path = self.imgs[idx]
        image = cv2.imread(image_path)
        if image is None:
            image = np.zeros((320, 320, 3), dtype='uint8')
        else:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        item = {'image': image}
        if self.transforms is not None:
            item = self.transforms(**item)
        
        if self.mode == 'train':
            item['trg_signs']   = torch.tensor(self.signs[idx] - 1).long()

            classes = np.zeros(self.num_classes)
            for x in self.sign[idx]:
                if x == 0:
                    break
                classes[x] = 1

            item['trg_classes']   = torch.tensor(classes).long()

        return item

    def __len__(self) -> int:
        return len(self.imgs)

dataset_train = Dataset(df, transforms=transform_train, mode='train')
dataset_valid = Dataset(df, transforms=transform_val, mode='train')

loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE,
                                           num_workers=NUM_WORKERS, shuffle=True,
                                           drop_last=True, pin_memory=True)

loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=BATCH_SIZE,
                                           num_workers=NUM_WORKERS, shuffle=False,
                                           drop_last=False, pin_memory=True)

In [9]:
y_train = [x for x in df[df.columns[2:-1]].values.flatten() if x > 0]

weight = len(y_train) / (len(np.unique(y_train)) * np.bincount(y_train))
weight = torch.FloatTensor(weight)
weight = torch.nan_to_num(weight, posinf=1.0, neginf=1.0)
weight[0] = weight[0] / 8
weight = weight.cuda()
weight

  weight = len(y_train) / (len(np.unique(y_train)) * np.bincount(y_train))


tensor([ 0.1250,  0.1859,  0.3831,  4.2139,  0.1271,  0.1461,  1.4046,  1.0993,
         0.0854,  0.4597,  0.2940,  0.7023,  2.8093,  3.6119,  0.4682,  0.6020,
         0.5619,  2.2985,  4.2139,  3.6119,  5.0567,  2.5284,  2.8093, 12.6418,
         8.4279,  1.8060,  0.6833,  2.8093,  0.5267,  0.4958,  1.0535,  3.1604,
         2.2985, 12.6418,  6.3209,  0.9364, 12.6418,  0.7224,  6.3209,  8.4279,
         3.6119,  6.3209,  2.1070,  4.2139,  8.4279,  3.1604,  1.8060, 12.6418,
         8.4279,  2.8093, 25.2836,  2.8093, 12.6418,  1.0000,  8.4279,  6.3209,
        25.2836, 25.2836,  5.0567, 25.2836,  8.4279,  5.0567, 25.2836, 12.6418,
         1.0000, 25.2836, 25.2836, 25.2836, 25.2836, 25.2836], device='cuda:0')

In [10]:
model.cuda()

optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,
                                                   epochs=EPOCHS,
                                                   max_lr=LR,
                                                   div_factor=50.0,
                                                   final_div_factor=200.0,
                                                   steps_per_epoch=1)

critetion = nn.BCEWithLogitsLoss(weight)

best_cnt = 0
best_loss = 10.0

margin_cls = ArcMarginProductLoss(8, margin=0.6)

In [11]:
for epoch in range(EPOCHS):

    current_lr = optimizer.param_groups[0]['lr']
    
    print(f"Start epoch {epoch + 1} at {datetime.now().strftime('%H:%M:%S')}, lr={current_lr:0.6f}")

    loss_cls = []
    loss_sign = []
    model.train()
    optimizer.zero_grad()
    for batch in loader_train:
        batch = {k:v.cuda() for k, v in batch.items()}
        
        pred = model(batch['image'])
        
        cls_loss = critetion(pred['classes'], batch['trg_classes'].float())
        sign_loss = margin_cls(pred['signs'], batch['trg_signs'])
        
        loss_cls.append(cls_loss.item())
        loss_sign.append(sign_loss.item())
        
        loss = cls_loss * 0.50 + sign_loss * 0.50

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        optimizer.zero_grad()

    lr_scheduler.step()

    torch.cuda.empty_cache()
    model.eval()

    val_cls  = []
    val_sign = []
    pred_classes = []
    true_classes = []

    with torch.no_grad():
        for batch in loader_valid:
            batch = {k:v.cuda() for k, v in batch.items()}
            pred = model(batch['image'])
            cls_loss = critetion(pred['classes'], batch['trg_classes'].float())
            sign_loss = margin_cls(pred['signs'], batch['trg_signs'])
            
            val_cls.append(cls_loss.item())
            val_sign.append(sign_loss.item())
            
            for i, topk in enumerate(pred['signs'].argmax(dim=1).cpu().numpy()):
                values  = pred['classes'][i].topk(topk + 1).values.sigmoid()
                indices = pred['classes'][i].topk(topk + 1).indices

                pred_classes.append([indices[idx].item() for idx in range(len(values)) if values[idx] > 0.5])
                true_classes.extend(batch['trg_classes'].cpu().numpy().tolist())

    pred_val = []

    for i in range(len(pred_classes)):
        preds = set(pred_classes[i])
        trues = set(np.where(np.asarray(true_classes[i]) > 0)[0])
        pred_val.append((len(preds & trues) + 1) / (len(preds | trues) + 1))

    print(f"  train loss: cls = {min(9.9999, np.mean(loss_cls)):6.4f} signs = {min(9.9999, np.mean(loss_sign)):6.4f}")
    print(f"  valid loss: cls = {min(9.9999, np.mean(val_cls)):6.4f} signs = {min(9.9999, np.mean(val_sign)):6.4f} recall = {np.mean(pred_val):6.4f}")

    best_cnt += 1
    if np.mean(val_cls) < best_loss:
        best_cnt = 0
        best_loss = np.mean(val_cls)
        torch.save(model.state_dict(), f"{MODEL_NAME}.pth")
        print("Saved best model!")

Start epoch 1 at 11:30:37, lr=0.000001
  train loss: cls = 4.7646 signs = 9.9999
  valid loss: cls = 4.6629 signs = 9.9999 recall = 0.2151
Saved best model!
Start epoch 2 at 11:31:43, lr=0.000001
  train loss: cls = 4.5833 signs = 9.9999
  valid loss: cls = 4.4481 signs = 9.9999 recall = 0.2356
Saved best model!
Start epoch 3 at 11:32:48, lr=0.000001
  train loss: cls = 4.3451 signs = 9.9999
  valid loss: cls = 4.1946 signs = 9.9999 recall = 0.2461
Saved best model!
Start epoch 4 at 11:33:54, lr=0.000002
  train loss: cls = 4.0700 signs = 9.9999
  valid loss: cls = 3.8580 signs = 9.9999 recall = 0.2588
Saved best model!
Start epoch 5 at 11:35:00, lr=0.000002
  train loss: cls = 3.7414 signs = 9.9999
  valid loss: cls = 3.4153 signs = 9.9999 recall = 0.2670
Saved best model!
Start epoch 6 at 11:36:05, lr=0.000002
  train loss: cls = 3.2763 signs = 9.9999
  valid loss: cls = 2.7747 signs = 9.9999 recall = 0.2725
Saved best model!
Start epoch 7 at 11:37:11, lr=0.000003
  train loss: cls =

In [12]:
model.load_state_dict(torch.load(f"{MODEL_NAME}.pth", map_location=device))

df = pd.read_csv('test.csv')
df.head()

Unnamed: 0,id,img
0,754,6-avi-frame14887_jpg.rf.bb0bf6b4b122c23e1b33a9e05d006b06.jpg
1,29,5-avi-frame2916_jpg.rf.1ecdbbc129d33896fd25b9b8dfdbfc43.jpg
2,1157,5-avi-frame2871_jpg.rf.f73998176f8a19ee03f870416674385b.jpg
3,1049,6-avi-frame5752_jpg.rf.a067b0fc55b770c9b10bb7a822425229.jpg
4,715,6-avi-frame5678_jpg.rf.f140419d224703d49fe65db505e7f1ce.jpg


In [13]:
# 0.376580

dataset_test = Dataset(df, transforms=transform_val, mode='test')

loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=BATCH_SIZE,
                                           num_workers=NUM_WORKERS, shuffle=False,
                                           drop_last=False, pin_memory=True)

torch.cuda.empty_cache()
model.eval()

pred_classes = []

with torch.no_grad():
    for batch in loader_test:
        batch = {k:v.cuda() for k, v in batch.items()}
        pred = model(batch['image'])
        for i, topk in enumerate(pred['signs'].argmax(dim=1).cpu().numpy()):
            topk = max(1, topk)
            values  = pred['classes'][i].topk(topk).values.sigmoid()
            indices = pred['classes'][i].topk(topk).indices
            pred_classes.append([indices[idx] for idx in range(len(values)) if values[idx] > 0.2])

signs = np.zeros((len(pred_classes), 8), dtype=int)
for r, pred in enumerate(pred_classes):
    for i, k in enumerate(pred):
        signs[r][i] = k

df_cls = pd.DataFrame(signs, columns=[f'sing{i}' for i in range(1, 9)])
df_out = pd.merge(df[['id']], df_cls, how='inner', left_index=True, right_index=True)

df_out.to_csv(f'{MODEL_NAME}.csv', index=False)
df_out.head(3)

Unnamed: 0,id,sing1,sing2,sing3,sing4,sing5,sing6,sing7,sing8
0,754,22,0,0,0,0,0,0,0
1,29,8,0,0,0,0,0,0,0
2,1157,8,0,0,0,0,0,0,0
