In [None]:
!pip install -q timm
!pip install -q git+https://github.com/PyFstat/PyFstat@python37

[0m

In [None]:
# import wandb
import os, gc, re
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import numpy as np, pandas as pd
from collections import defaultdict
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

import skimage
from skimage import io

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

class CFG:
    wandb=False
    competition='G2Net'
    model='inception_v4'
    apex=False
    max_grad_norm=1.36
    seed=13
    positive_rate=0.5
    signal_low=0.2
    signal_high=0.1
    folds=10
    lr=0.00056
    dropout=0.25
    epochs=3
    gaussian_noise=2.
    one_cycle_pct_start=0.1
    one_cycle=True
    batch=32
    hog=True


# class CFG:
#     wandb=False
#     competition='G2Net'
#     model='tf_efficientnetv2_m'
#     apex=False
#     max_grad_norm=1.36
#     seed=13
#     positive_rate=0.5
#     signal_low=0.2
#     signal_high=0.1
#     folds=10
#     lr=0.00046
#     dropout=0.25
#     epochs=2
#     gaussian_noise=2.
#     one_cycle_pct_start=0.3
#     one_cycle=True
#     batch=16
#     hog=True

In [None]:
def get_transforms():
    return torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=0.5, std=0.1)
    ])


class G2Net_Dataset(nn.Dataset):
    def __init__(
        self,
        df_noise: pd.DataFrame,
        df_signal: pd.DataFrame,
        positive_rate: float = CFG.positive_rate,
        gaussian_noise: float = CFG.gaussian_noise,
        signal_low: float = CFG.signal_low,
        signal_high: float = CFG.signal_high
        is_train: bool = False
    ) -> None:
        self.df_noise = df_noise
        self.df_signal = df_signal
        self.positive_rate = positive_rate
        self.gaussian_noise = gaussian_noise
        self.signal_low = signal_low
        self.signal_high = signal_high
        self.transforms = get_transforms()
        self.is_train = is_train

    def __len__(self):
        return len(self.df_signal)

    def gen_sample(self, signal, noise, signal_strength):
        noise = np.array(Image.open(noise))
        if signal:
            signal = np.array(Image.open(signal))
            noise = noise + signal_strength * signal
        if self.is_train and self.gaussian_noise > 0:
            noise = noise + np.random.randn(*noise.shape) * GAUSSIAN_NOISE 
        noise = np.clip(noise, 0, 255).astype(np.uint8)
        return self.transforms(noise)

    def __getitem__(self, index):
        noise_files = self.df_noise.sample().files.values[0]        
        sig_files = [None, None]
        label = 0.
        if np.random.random() < self.positive_rate:
            sig_files = self.df_signal.sample().files.values[0]
            label = 1.
        signal_strength = np.random.uniform(self.signal_low, self.signal_high)                    
        return np.concatenate(
            [self.gen_sample(sig, noise, signal_strength) for sig, noise in zip(sig_files, noise_files)], axis=0
        ), label

In [None]:
def net_hog_features(img: np.array, dim: int = 257) ->np.array:    
    img = np.transpose(img.cpu().numpy(), (1, 2, 0))
    bins = np.linspace(0, 1, dim)
    fd = skimage.feature.hog(
        img, orientations=8, pixels_per_cell=(16, 16),
        cells_per_block=(3, 3), visualize=False, multichannel=True
    )
    hist = np.histogram(fd, bins=bins)
    return hist[0]


# -> CONV/FC -> BatchNorm -> ReLu(or other activation) -> Dropout -> CONV/FC ->
class Net(nn.Module):
    def __init__(
        self,
        name_model: str = CFG.model,
        dim: int = 256,
        hog: bool = CFG.hog
    ):
        super().__init__()
        self.model = timm.create_model(
            name_model,
            in_chans=2,
            pretrained=True 
        )
        self.dim = dim
        self.hog = hog
        if self.hog:
            self.s = nn.Linear(1000+self.dim, 1)
        else:
            self.s = nn.Linear(1000, 1)
        # print('Take version: ', self.s, 'dim: ', self.dim)

    def forward(self, x): 
        x1  = self.model(x)
        if not self.hog:
            return self.s(x1)
        tmp = []
        for j in x:
            tmp.append(
                torch.tensor(net_hog_features(j, self.dim + 1), dtype=torch.float, 
            ).reshape(1, -1).to(DEVICE))
        xx =  torch.cat(tmp, axis = 0)
        xx = nn.functional.normalize(xx, p=2.0, dim = 1)
        x1 = nn.functional.normalize(x1, p=2.0, dim = 1) 
        x3 = torch.cat((x1, xx), axis = 1)   
        return self.s(x3)


def train(
    model: nn.Module,
    loader: torch.utils.data.dataloader,
    optimizer: Optional[torch.optim.Optimizer],
    scheduler: Optional[torch.optim.lr_scheduler.LambdaLR],
    epoch: int
) -> None:
    model.train()
    pbar = tqdm(
        loader,
        desc=f"Model Train, epoch: {epoch+1} ",
        total=len(loader),
        mininterval= len(loader)//20
    )
    for X, y in pbar:
        optimizer.zero_grad()
        with torch.autocast(enabled=CFG.apex):
            y_ = model(X.to(DEVICE))
            loss = torch.nn.functional.binary_cross_entropy_with_logits(
                y_.squeeze(), y.to(DEVICE)
            )              
        loss.backward()
        norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        optimizer.step()
        if scheduler:
            scheduler.step()


@torch.no_grad()
def evaluate(
    model: nn.Module,
    loader: torch.utils.data.dataloader,
    epoch: int
) -> tuple:
    model.to(DEVICE)
    model.eval()        
    pred = []
    target = []
    pbar = tqdm(
        loader,
        desc=f"Valid, epoch: {epoch+1} ",
        total=len(loader),
        mininterval= len(loader)//20
    )
    for X, y in pbar:
        with torch.autocast(enabled=CFG.apex):
            y_ = model(X.to(DEVICE))
        pred.append(y_.cpu().squeeze())
        target.append(y)
    pred = torch.concat(pred)
    target = torch.concat(target)
    loss = torch.nn.functional.binary_cross_entropy_with_logits(
        pred, target, reduction='none'
    ).median().item()
    return roc_auc_score(target, torch.sigmoid(pred)), loss
    
    
def run_training(dim: int, fold: int = 0) -> None:
    kfold = KFold(CFG.folds, shuffle=True, random_state=CFG.seed)
    df_noise_train, df_noise_eval = None, None
    for f, (tr, vl) in enumerate(kfold.split(df_noise)):
        if f == fold:
            tr_noise = df_noise.loc[tr]
            vl_noise = df_noise.loc[vl]
    df_signal_train, df_signal_eval = None, None
    for f, (tr, vl) in enumerate(kfold.split(df_signal)):
        if f == fold:
            tr_signal = df_signal.loc[tr]
            vl_signal = df_signal.loc[vl]
    tr_data = G2Net_Dataset(
        tr_noise,
        tr_signal,
        is_train=True
    )
    vl_data = G2Net_Dataset(
        vl_noise,
        vl_signal
    )
    tr_loader = torch.utils.data.DataLoader(
        tr_data,
        batch_size=CFG.batch,
        num_workers=os.cpu_count(),
        pin_memory=True
    )
    vl_loder = torch.utils.data.DataLoader(
        vl_data,
        batch_size=CFG.batch,
        num_workers=os.cpu_count(),
        pin_memory=True
    )
    # model = timm.create_model(CFG.model, pretrained=True, num_classes=1, in_chans=2, drop_rate=CFG.dropout)
    # print('Load model: ',  model.__class__.__name__)
    model = Net(dim)
    model.to(DEVICE)
    optim = torch.optim.Adam(model.parameters(), lr=CFG.lr)
    scheduler = None
    if CFG.one_cycle::
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer=optim,
            max_lr=CFG.lr,
            total_steps=int(len(tr_loader) * CFG.epochs),
            pct_start=CFG.one_cycle_pct_start
    )
    max_auc = 0
    for epoch in range(CFG.epochs):
        train(model, tr_loader, optim, scheduler, epoch)
        auc, loss = evaluate(model, vl_loader, epoch)
        if auc > max_auc:
            torch.save(model.state_dict(), f'model-f{fold}.tph')
            max_auc = auc
        print(f'val_loss: {loss}, val_auc: {auc}, val_max_auc: {max_auc}')
    del model, tr_loader, vl_loder, tr_data, vl_data    
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
DIM = 16
for fold in FOLDS:
    print('------------', fold, '_____________________')
    run_training('', DIM, fold=fold)

------------ 0 _____________________


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/inceptionv4-8e4777a0.pth" to /root/.cache/torch/hub/checkpoints/inceptionv4-8e4777a0.pth


Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:52<00:00,  1.53s/it]
Eval: 100%|██████████| 30/30 [00:31<00:00,  1.05s/it]


val_loss: 0.21725983917713165, val_auc: 0.8558359013867488, val_max_auc: 0.8558359013867488


  import sys
Train: 100%|██████████| 269/269 [06:44<00:00,  1.50s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.02s/it]


val_loss: 0.6957787275314331, val_auc: 0.5291485021972442, val_max_auc: 0.8558359013867488


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.02s/it]


val_loss: 0.6657996773719788, val_auc: 0.5490073762777157, val_max_auc: 0.8558359013867488
------------ 1 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:43<00:00,  1.50s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.02s/it]


val_loss: 0.1641308069229126, val_auc: 0.8417407900266144, val_max_auc: 0.8417407900266144


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.7013483047485352, val_auc: 0.4991443452380952, val_max_auc: 0.8417407900266144


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.6905502676963806, val_auc: 0.5120118345785025, val_max_auc: 0.8417407900266144
------------ 2 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:42<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.23779955506324768, val_auc: 0.789041696826128, val_max_auc: 0.789041696826128


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.10983604192733765, val_auc: 0.8724002100840336, val_max_auc: 0.8724002100840336


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]


val_loss: 0.10265915095806122, val_auc: 0.891774626427849, val_max_auc: 0.891774626427849
------------ 3 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.2430133819580078, val_auc: 0.8843919476279902, val_max_auc: 0.8843919476279902


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.6686687469482422, val_auc: 0.5104028050147079, val_max_auc: 0.8843919476279902


  import sys
Train: 100%|██████████| 269/269 [06:39<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.683753490447998, val_auc: 0.48688359463957254, val_max_auc: 0.8843919476279902
------------ 4 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:29<00:00,  1.00it/s]


val_loss: 0.37494340538978577, val_auc: 0.8557115281078881, val_max_auc: 0.8557115281078881


  import sys
Train: 100%|██████████| 269/269 [06:39<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.02s/it]


val_loss: 0.1871296465396881, val_auc: 0.8793370243245968, val_max_auc: 0.8793370243245968


  import sys
Train: 100%|██████████| 269/269 [06:38<00:00,  1.48s/it]
Eval: 100%|██████████| 30/30 [00:29<00:00,  1.00it/s]


val_loss: 0.14313378930091858, val_auc: 0.8973395368417361, val_max_auc: 0.8973395368417361
------------ 5 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.31964579224586487, val_auc: 0.8653977781287843, val_max_auc: 0.8653977781287843


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.7051953077316284, val_auc: 0.5095533553355336, val_max_auc: 0.8653977781287843


  import sys
Train: 100%|██████████| 269/269 [06:39<00:00,  1.48s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.7211339473724365, val_auc: 0.48037005791217385, val_max_auc: 0.8653977781287843
------------ 6 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.17675276100635529, val_auc: 0.8283587990827059, val_max_auc: 0.8283587990827059


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.01s/it]


val_loss: 0.6578006744384766, val_auc: 0.5117366361993649, val_max_auc: 0.8283587990827059


  import sys
Train: 100%|██████████| 269/269 [06:39<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:29<00:00,  1.01it/s]


val_loss: 0.6963649988174438, val_auc: 0.47245561818469156, val_max_auc: 0.8283587990827059
------------ 7 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]


val_loss: 0.2635616660118103, val_auc: 0.8474509803921568, val_max_auc: 0.8474509803921568


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.02s/it]


val_loss: 0.15527327358722687, val_auc: 0.8878611296380805, val_max_auc: 0.8878611296380805


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:29<00:00,  1.00it/s]


val_loss: 0.11922501027584076, val_auc: 0.89480915092352, val_max_auc: 0.89480915092352
------------ 8 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]


val_loss: 0.2772250175476074, val_auc: 0.8798715823267809, val_max_auc: 0.8798715823267809


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]


val_loss: 0.6681984663009644, val_auc: 0.7551916092014528, val_max_auc: 0.8798715823267809


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:29<00:00,  1.01it/s]


val_loss: 0.6900312304496765, val_auc: 0.787805595043788, val_max_auc: 0.8798715823267809
------------ 9 _____________________
Take version:  Linear(in_features=1016, out_features=1, bias=True) select:  [False, False, False, True]


  import sys
Train: 100%|██████████| 269/269 [06:42<00:00,  1.50s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]


val_loss: 0.371162474155426, val_auc: 0.8470442204619419, val_max_auc: 0.8470442204619419


  import sys
Train: 100%|██████████| 269/269 [06:40<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]


val_loss: 0.6969045996665955, val_auc: 0.5103554970375247, val_max_auc: 0.8470442204619419


  import sys
Train: 100%|██████████| 269/269 [06:41<00:00,  1.49s/it]
Eval: 100%|██████████| 30/30 [00:30<00:00,  1.00s/it]

val_loss: 0.679486870765686, val_auc: 0.49686160306416477, val_max_auc: 0.8470442204619419



