## Определяем среду исполнения, для коллаба работаем с данными на Google Drive

Данный ноутбук исполнялся на домашнем компьютере, поэтому все исходные изображения должны быть помещены в подкаталог train и test соответственно, табличные данные - в текущий каталог.

In [1]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    num_workers = 2
    BATCHSIZE = 32
    ROOT = '/content/'
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    !cp /content/drive/MyDrive/2022_udm.zip /content/2022_udm.zip
    !unzip -q /content/2022_udm.zip -d /content/

    !python -m pip install --upgrade pip
    !pip install -U timm albumentations opencv-contrib-python pytorch_metric_learning
else:
    num_workers = 4
    ROOT = './'
    BATCHSIZE = 16

!nvidia-smi

Tue Jul 12 23:00:06 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.39.01    Driver Version: 510.39.01    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
|  0%   54C    P0    61W / 200W |     19MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Определяем основные библиотеки, загружаем предобученную модель

In [2]:
import os
import gc
import glob
import sys
import time
import random
from datetime import datetime
from pathlib import Path

import cv2
import numpy as np
import pandas as pd
from IPython.display import clear_output
from sklearn.metrics import r2_score

import torch
import torch.nn as nn
from torch.utils.data import Dataset

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pytorch_metric_learning.losses import ArcFaceLoss

TIMMMODEL = 'tf_efficientnetv2_m_in21ft1k'
SIZE = 384

SEED = 111
DEVICE = 'cuda'
VERSION = '1207'
EPOCHS = 200
EMB_FEATURES = 256

scaler = torch.cuda.amp.GradScaler()
model = timm.create_model(TIMMMODEL, pretrained=True)

In [3]:
model.default_cfg

{'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m_21ft1k-bf41664a.pth',
 'num_classes': 1000,
 'input_size': (3, 384, 384),
 'pool_size': (12, 12),
 'crop_pct': 1.0,
 'interpolation': 'bicubic',
 'mean': (0.5, 0.5, 0.5),
 'std': (0.5, 0.5, 0.5),
 'first_conv': 'conv_stem',
 'classifier': 'classifier',
 'test_input_size': (3, 480, 480),
 'architecture': 'tf_efficientnetv2_m_in21ft1k'}

## Зафиксируем ГСЧ

In [4]:
def seed_everything(seed=1234):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False

seed_everything(SEED)

## Определим класс датасета и аугментации

In [5]:
class ImageDataset(Dataset):
    def __init__(self, data_df, transform=None):
        self.data_df = data_df
        self.transform = transform

    def __getitem__(self, idx):
        image_name = self.data_df.iloc[idx]['img_num']

        image = cv2.imread(f"{ROOT}train/{image_name}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            image = self.transform(image=image)['image']

        label = self.data_df.iloc[idx]['number_of_houses']

        return image, torch.tensor(min(label, 26)).long()
    
    def __len__(self):
        return len(self.data_df)


transform_train = A.Compose([
    A.Resize(SIZE, SIZE),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.GaussNoise(p=0.2),
    A.OneOf([
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
    ], p=0.2),
    A.RandomRotate90(p=0.5),
    A.OneOf([
      A.RandomFog(fog_coef_lower=0.1, fog_coef_upper=0.4, p=0.2),
      A.RandomShadow(p=0.2),
    ], p=0.3),
    A.OneOf([
        A.OpticalDistortion(p=0.3),
        A.GridDistortion(p=.1),
        A.PiecewiseAffine(p=0.3),
    ], p=0.2),
    A.OneOf([
        A.CLAHE(clip_limit=2),
        A.Sharpen(),
        A.Emboss(),
        A.RandomBrightnessContrast(),            
    ], p=0.3),
    A.HueSaturationValue(p=0.3),
    A.Normalize(mean= model.default_cfg['mean'],
                std = model.default_cfg['std']),
    ToTensorV2()
])

transform_val = A.Compose([
    A.Resize(SIZE, SIZE),
    A.Normalize(mean= model.default_cfg['mean'],
                std = model.default_cfg['std']),
    ToTensorV2()
])

## Отбросим все изображения, где черного фона более 25% или где количество сооружений больше 26.

In [6]:
ignore_list = []
data_df = pd.read_csv(f'{ROOT}train.csv')
for i in range(len(data_df)):
    img = cv2.imread('train/' + data_df.iloc[i]['img_num'])
    mask = (img[:, :, 0] < 5).sum() / (img.shape[0] * img.shape[1])
    if mask > 0.25:
        ignore_list.append(data_df.iloc[i]['img_num'])

data_df = data_df[~data_df['img_num'].isin(ignore_list)].copy()
data_df = data_df[data_df['number_of_houses'] < 27].copy()

data_df['number_of_houses'].value_counts()

1     200
10    139
2     139
3     137
4     134
8     133
11    133
6     126
5     116
9     113
7     110
13    108
14    103
12     94
15     85
16     63
17     40
18     31
19     21
20     11
21     11
22     10
24      4
23      4
25      4
Name: number_of_houses, dtype: int64

## Инициализируем даталоадеры

In [7]:
dataset_train = ImageDataset(data_df, transform_train)
dataset_val   = ImageDataset(data_df, transform_val)

loader_train = torch.utils.data.DataLoader(dataset=dataset_train,
                                           batch_size=BATCHSIZE,
                                           shuffle=True,
                                           pin_memory=True,
                                           num_workers=num_workers)
loader_val   = torch.utils.data.DataLoader(dataset=dataset_val,
                                           batch_size=BATCHSIZE * 2,
                                           shuffle=False,
                                           pin_memory=True,
                                           drop_last=False,
                                           num_workers=num_workers)

## Создадим требуемые нам головы модели

In [8]:
class Head(nn.Module):
    def __init__(self, in_features, emb_features:int=512):
        super().__init__()

        self.in_features = in_features
        self.emb_features = emb_features
        
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Sequential(
            nn.Linear(self.in_features, self.emb_features, bias=False),
            nn.BatchNorm1d(self.emb_features),
            nn.ReLU(inplace=True),
            nn.Linear(self.emb_features, 1)
        )
        self.neck = nn.Sequential(
            nn.BatchNorm1d(self.in_features),
            nn.Linear(self.in_features, self.emb_features, bias=False),
            nn.ReLU(inplace=True),
            nn.BatchNorm1d(self.emb_features),
            nn.Linear(self.emb_features, self.emb_features, bias=False),
            nn.BatchNorm1d(self.emb_features)
        )

        torch.nn.init.xavier_normal_(self.neck[1].weight)
        torch.nn.init.xavier_normal_(self.neck[4].weight)

        torch.nn.init.xavier_normal_(self.out[0].weight)
        torch.nn.init.xavier_normal_(self.out[3].weight)
        torch.nn.init.zeros_(self.out[3].bias)
        
        
    def forward(self, features):
        x = self.dropout(features)
        x_ = self.neck(x)
        x = self.out(x)
        
        return x, x_


head_name = model.default_cfg['classifier']


# Различные модели имеют различный выходной слой классификатора
if head_name == 'last_linear':
    model.last_linear = Head(model.last_linear.in_features, emb_features=EMB_FEATURES)
elif head_name == 'head':
    model.head = Head(model.head.in_features, emb_features=EMB_FEATURES)
elif head_name == 'fc':
    model.fc = Head(model.fc.in_features, emb_features=EMB_FEATURES)
elif head_name == 'classifier':
    model.classifier = Head(model.classifier.in_features, emb_features=EMB_FEATURES)


_ = model.to(DEVICE)

## Основной цикл обучения и сохранения весов модели

In [9]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                 T_0=10, 
                                                                 T_mult=1, 
                                                                 eta_min=1e-6,
                                                                 last_epoch=-1)      

criterion = torch.nn.MSELoss()
criterion_ = ArcFaceLoss(28, EMB_FEATURES).to(DEVICE)

start_time = time.time()
for epoch in range(EPOCHS):

    losses  = []
    losses_ = []
    model.train()
    start_date = datetime.now().strftime("%H:%M:%S")
    print(f'{start_date} start train {epoch + 1}/{EPOCHS} epoch')
    for it, (imgs, labels) in enumerate(loader_train, start = 1):
        if it % 100 == 0:
            print(f'{datetime.now().strftime("%H:%M:%S")} iter {it:5d}')
        
        optimizer.zero_grad()

        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)

        with torch.cuda.amp.autocast(enabled=True):
            y_pred, y_pred_ = model(imgs)
            loss = criterion(y_pred, labels.float().unsqueeze(1))
            loss_ = criterion_(y_pred_, labels)
            
        scaler.scale(loss * 0.8 + loss_ * 0.2).backward()
        losses.append(loss.item())
        losses_.append(loss_.item())
        
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
    scheduler.step()

    torch.cuda.empty_cache()
    model.eval()
    print(f'{datetime.now().strftime("%H:%M:%S")} start validate')
    for it, (imgs, labels) in enumerate(loader_val, start = 1):
        if it % 100 == 0:
            print(f'{datetime.now().strftime("%H:%M:%S")} iter {it:5d}')
        
        imgs = imgs.to(DEVICE)
        labels = labels.to(DEVICE)

        valid_labels = []
        valid_predicts = []
        with torch.no_grad():
            y_pred, _ = model(imgs)

        pred = torch.clip(y_pred, 1.0, 26.0)
        pred = torch.ceil(pred)

        pred_numpy = pred.cpu().numpy().astype('int').flatten()
        valid_predicts.extend(pred_numpy.tolist())
        valid_labels.extend(labels.cpu().numpy().astype('int').flatten().tolist())

    val_r2 = r2_score(valid_labels, valid_predicts)
        

    if IN_COLAB:
        torch.save(model.state_dict(), f"/content/drive/MyDrive/chkp/{VERSION}_{TIMMMODEL}_arc.pth")
        if val_r2 > 0.90:
            torch.save(model.state_dict(), f"/content/drive/MyDrive/chkp/{VERSION}_{TIMMMODEL}_arc_{epoch:03d}_r2-{val_r2:.4f}.pth")
    else:
        torch.save(model.state_dict(), f"{VERSION}_{TIMMMODEL}_arc.pth")
        if val_r2 > 0.90:
            torch.save(model.state_dict(), f"{VERSION}_{TIMMMODEL}_arc_{epoch:03d}_r2-{val_r2:.4f}.pth")

    clear_output()

    print(f'{start_date} - {datetime.now().strftime("%H:%M:%S")} {epoch + 1}/{EPOCHS} epoch cls {np.mean(losses):.4f} arc {np.mean(losses_):.4f} validate R2 {val_r2:.4f}')
    torch.cuda.empty_cache()

print(f"total {time.time() - start_time:.0f} seconds")

06:13:56 - 06:16:05 200/200 epoch cls 0.3210 arc 1.1238 validate R2 0.9967
total 26149 seconds
