In [1]:
from pathlib import Path
import random
from pprint import pprint

import cv2
import albumentations
import numpy as np
import pandas as pd
import time
import os
import matplotlib.pyplot as plt
# These transformations will be passed to our model class
import torch
import yaml
from tqdm.auto import tqdm
from types import SimpleNamespace
import albumentations as A
from sklearn.preprocessing import LabelEncoder
from torch.nn.parameter import Parameter
import torch.nn as nn
import math
import torch.nn.functional as F

import json
from multiprocessing.sharedctypes import Value
from pathlib import Path

import pandas as pd
from sklearn.metrics import average_precision_score
import typer

from sklearn.metrics.pairwise import cosine_similarity
import timm
import pickle

In [2]:
PREDICTION_LIMIT = 20
QUERY_ID_COL = "query_id"
DATABASE_ID_COL = "database_image_id"
SCORE_COL = "score"


class MeanAveragePrecision:
    @classmethod
    def score(cls, predicted: pd.DataFrame, actual: pd.DataFrame, prediction_limit: int):
        """Calculates mean average precision for a ranking task.
        :param predicted: The predicted values as a dataframe with specified column names
        :param actual: The ground truth values as a dataframe with specified column names
        """
        if not predicted[SCORE_COL].between(0.0, 1.0).all():
            raise ValueError("Scores must be in range [0, 1].")
        if predicted.index.name != QUERY_ID_COL:
            raise ValueError(
                f"First column of submission must be named '{QUERY_ID_COL}', "
                f"got {predicted.index.name}."
            )
        if predicted.columns.to_list() != [DATABASE_ID_COL, SCORE_COL]:
            raise ValueError(
                f"Columns of submission must be named '{[DATABASE_ID_COL, SCORE_COL]}', "
                f"got {predicted.columns.to_list()}."
            )

        unadjusted_aps, predicted_n_pos, actual_n_pos = cls._score_per_query(
            predicted, actual, prediction_limit
        )
        adjusted_aps = unadjusted_aps.multiply(predicted_n_pos).divide(actual_n_pos)
        return adjusted_aps.mean()

    @classmethod
    def _score_per_query(
        cls, predicted: pd.DataFrame, actual: pd.DataFrame, prediction_limit: int
    ):
        """Calculates per-query mean average precision for a ranking task."""
        merged = predicted.merge(
            right=actual.assign(actual=1.0),
            how="left",
            on=[QUERY_ID_COL, DATABASE_ID_COL],
        ).fillna({"actual": 0.0})
        # Per-query raw average precisions based on predictions
        unadjusted_aps = merged.groupby(QUERY_ID_COL).apply(
            lambda df: average_precision_score(df["actual"].values, df[SCORE_COL].values)
            if df["actual"].sum()
            else 0.0
        )
        # Total ground truth positive counts for rescaling
        predicted_n_pos = merged["actual"].groupby(QUERY_ID_COL).sum().astype("int64").rename()
        actual_n_pos = actual.groupby(QUERY_ID_COL).size().clip(upper=prediction_limit)
        return unadjusted_aps, predicted_n_pos, actual_n_pos

In [3]:

import torch
import numpy as np

import torch
import numpy as np

class BelugaDataset(torch.utils.data.Dataset):

    def __init__(self, df, transforms=None, is_train=True):
        self.data = df
        self.is_train = is_train
        
        self.transforms = transforms
        self.normalization = 'imagenet'
        
        
    def normalize_img(self,img):
        
        if self.normalization == 'channel':
            #print(img.shape)
            pixel_mean = img.mean((0,1))
            pixel_std = img.std((0,1)) + 1e-4
            img = (img - pixel_mean[None,None,:]) / pixel_std[None,None,:]
            img = img.clip(-20,20)
           
        elif self.normalization == 'image':
            img = (img - img.mean()) / (img.std() + 1e-4)
            img = img.clip(-20,20)
            
        elif self.normalization == 'simple':
            img = img/255
            
        elif self.normalization == 'inception':
            mean = np.array([0.5, 0.5 , 0.5], dtype=np.float32)
            std = np.array([0.5, 0.5 , 0.5], dtype=np.float32)
            img = img.astype(np.float32)
            img = img/255.
            img -= mean
            img *= np.reciprocal(std, dtype=np.float32)
            
        elif self.normalization == 'imagenet':
            mean = np.array([123.675, 116.28 , 103.53 ], dtype=np.float32)
            std = np.array([58.395   , 57.120, 57.375   ], dtype=np.float32)
            img = img.astype(np.float32)
            img -= mean
            img *= np.reciprocal(std, dtype=np.float32)
            
        elif self.normalization == 'min_max':
            img = img - np.min(img)
            img = img / np.max(img)
            return img
        
        else:
            pass
        
        return img
    
    def __len__(self):
        
        return len(self.data)
    
    def custom_aug(self,img):
        sz1,sz2,_ = img.shape
        if np.random.uniform() < 0.5:
            p_blackout = np.random.uniform()
            

            if p_blackout < 0.5: #upper body
                img[0:int(sz1/2),:,:] =0
            else:
                img[int(sz1/2):,:,:] = 0
        else:
            
            p_blackout = np.random.uniform()
            if p_blackout < 0.5: #upper body
                img[:,0:int(sz2/2),:] =0
            else:
                img[:,int(sz2/2):,:] =0
        return img
    
    def get_pos_image(self,image_id,whale_id):
        sub_df = self.data[(self.data.image_id != image_id)&(self.data.whale_id==whale_id)]
        if len(sub_df) >= 1:
            pos_image_id = sub_df.sample(n=1).image_id.values[0]
        else:
            pos_image_id = image_id
            
        pos_image_path = f'../../input/images/{pos_image_id}.jpg'
        img = cv2.imread(pos_image_path,cv2.COLOR_BGR2RGB)
        
        if img.shape[1] > img.shape[0]:
            img = np.transpose(img, [1, 0, 2])
        
        return img,pos_image_id
            
    def get_neg_image(self,image_id,whale_id):
        

        sub_df = self.data[(self.data.image_id != image_id)&(self.data.whale_id!=whale_id)]
        neg_image_id = sub_df.sample(n=1,random_state=42).image_id.values[0]
            
        
        neg_image_path = f'../../input/images/{neg_image_id}.jpg'
        img = cv2.imread(neg_image_path,cv2.COLOR_BGR2RGB)
        
        if img.shape[1] > img.shape[0]:
            img = np.transpose(img, [1, 0, 2])
        
        return img,neg_image_id
    
    def __getitem__(self, idx):

        row = self.data.iloc[idx]
        image_id = row.image_id
        whale_id = row.whale_id
        
        img_path = '../../input/' + row.path
        img = cv2.imread(img_path,cv2.COLOR_BGR2RGB)
        if row.viewpoint != 'top':
            img = np.transpose(img, [1, 0, 2])
            
        if np.random.uniform() < 0.1:
            img = self.custom_aug(img)
        sample_d = self.transforms(image=img)
        img=sample_d["image"]  

        img = self.normalize_img(img)
        img = np.transpose(img, [2, 0, 1])
            

        sample = {"image_id": image_id, "image": img, "target":whale_id}
        return sample


In [4]:
def weights_init_kaiming(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        nn.init.kaiming_normal_(m.weight, a=0, mode='fan_out')
        nn.init.constant_(m.bias, 0.0)
    elif classname.find('Conv') != -1:
        nn.init.kaiming_normal_(m.weight, a=0, mode='fan_in')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)
    elif classname.find('BatchNorm') != -1:
        if m.affine:
            nn.init.constant_(m.weight, 1.0)
            nn.init.constant_(m.bias, 0.0)

In [5]:
class CrossEntropyLabelSmooth(nn.Module):
    """Cross entropy loss with label smoothing regularizer.
    Reference:
    Szegedy et al. Rethinking the Inception Architecture for Computer Vision. CVPR 2016.
    Equation: y = (1 - epsilon) * y + epsilon / K.
    Args:
        num_classes (int): number of classes.
        epsilon (float): weight.
    """
    def __init__(self, num_classes, epsilon=0.07, use_gpu=True, reduction=True):
        super(CrossEntropyLabelSmooth, self).__init__()
        self.num_classes = num_classes
        self.epsilon = epsilon
        self.use_gpu = use_gpu
        self.logsoftmax = nn.LogSoftmax(dim=1)
        self.reduction = reduction

    def forward(self, inputs, targets):
        """
        Args:
            inputs: prediction matrix (before softmax) with shape (batch_size, num_classes)
            targets: ground truth labels with shape (num_classes)
        """
        log_probs = self.logsoftmax(inputs)
        targets = torch.zeros(log_probs.size()).scatter_(1, targets.type(torch.int64).unsqueeze(1).data.cpu(), 1)
        if self.use_gpu: targets = targets.cuda()
        targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
        if self.reduction:
            loss = (- targets * log_probs).mean(0).sum()
        else:
            loss = (- targets * log_probs)
            
        return loss

In [6]:
from __future__ import absolute_import

import torch
from torch import nn


class CenterLoss(nn.Module):
    """Center loss.
    Reference:
    Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
    Args:
        num_classes (int): number of classes.
        feat_dim (int): feature dimension.
    """

    def __init__(self, num_classes=788, feat_dim=2048, use_gpu=True):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.use_gpu = use_gpu

        if self.use_gpu:
            self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda())
        else:
            self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))

    def forward(self, x, labels):
        """
        Args:
            x: feature matrix with shape (batch_size, feat_dim).
            labels: ground truth labels with shape (num_classes).
        """
        assert x.size(0) == labels.size(0), "features.size(0) is not equal to labels.size(0)"

        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long()
        if self.use_gpu: classes = classes.cuda()
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = []
        for i in range(batch_size):
            value = distmat[i][mask[i]]
            value = value.clamp(min=1e-12, max=1e+12)  # for numerical stability
            dist.append(value)
        dist = torch.cat(dist)
        loss = dist.mean()
        return loss

In [7]:
import torch.nn as nn
class Swish(torch.autograd.Function):

    @staticmethod
    def forward(ctx, i):
        result = i * torch.sigmoid(i)
        ctx.save_for_backward(i)
        return result

    @staticmethod
    def backward(ctx, grad_output):
        i = ctx.saved_variables[0]
        sigmoid_i = torch.sigmoid(i)
        return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))


class Swish_module(nn.Module):
    def forward(self, x):
        return Swish.apply(x)



class DenseCrossEntropy(nn.Module):
    def forward(self, x, target):
        x = x.float()
        target = target.float()
        logprobs = torch.nn.functional.log_softmax(x, dim=-1)

        loss = -logprobs * target
        loss = loss.sum(-1)
        return loss.mean()


class ArcMarginProduct_subcenter(nn.Module):
    def __init__(self, in_features, out_features, k=3):
        super().__init__()
        self.weight = nn.Parameter(torch.FloatTensor(out_features*k, in_features))
        self.reset_parameters()
        self.k = k
        self.out_features = out_features
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        
    def forward(self, features):
        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
        cosine_all = cosine_all.view(-1, self.out_features, self.k)
        cosine, _ = torch.max(cosine_all, dim=2)
        return cosine   


class ArcFaceLossAdaptiveMargin(nn.modules.Module):
    def __init__(self, margins, n_classes, s=30.0):
        super().__init__()
        self.crit = DenseCrossEntropy()
        self.s = s
        self.margins = margins
        self.out_dim =n_classes
            
    def forward(self, logits, labels):
        ms = []
        ms = self.margins[labels.cpu().numpy()]
        cos_m = torch.from_numpy(np.cos(ms)).float().cuda()
        sin_m = torch.from_numpy(np.sin(ms)).float().cuda()
        th = torch.from_numpy(np.cos(math.pi - ms)).float().cuda()
        mm = torch.from_numpy(np.sin(math.pi - ms) * ms).float().cuda()
        labels = F.one_hot(labels, self.out_dim).float()
        logits = logits.float()
        cosine = logits
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * cos_m.view(-1,1) - sine * sin_m.view(-1,1)
        phi = torch.where(cosine > th.view(-1,1), phi, cosine - mm.view(-1,1))
        output = (labels * phi) + ((1.0 - labels) * cosine)
        output *= self.s
        loss = self.crit(output, labels)
        return loss     



class ArcMarginProduct(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.Tensor(out_features, in_features))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weight)
        # stdv = 1. / math.sqrt(self.weight.size(1))
        # self.weight.data.uniform_(-stdv, stdv)

    def forward(self, features):
        cosine = F.linear(F.normalize(features), F.normalize(self.weight))
        return cosine


class ArcFaceLoss(nn.modules.Module):
    def __init__(self, s=45.0, m=0.1, crit="bce", weight=None, reduction="mean",label_smoothing=None,class_weights_norm=None ):
        super().__init__()

        self.weight = weight
        self.reduction = reduction
        self.class_weights_norm = class_weights_norm
        if label_smoothing is None:
            self.crit = nn.CrossEntropyLoss(reduction="none")   
        else:
            self.crit = nn.CrossEntropyLoss(reduction="none",label_smoothing=label_smoothing)   
        
        if s is None:
            self.s = torch.nn.Parameter(torch.tensor([45.], requires_grad=True, device='cuda'))
        else:
            self.s = s

        
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.th = math.cos(math.pi - m)
        self.mm = math.sin(math.pi - m) * m
        
    def forward(self, logits, labels):

        logits = logits.float()
        cosine = logits
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        labels2 = torch.zeros_like(cosine)
        labels2.scatter_(1, labels.view(-1, 1).long(), 1)
        output = (labels2 * phi) + ((1.0 - labels2) * cosine)

        s = self.s

        output = output * s
        loss = self.crit(output, labels)

        if self.weight is not None:
            w = self.weight[labels].to(logits.device)

            loss = loss * w
            if self.class_weights_norm == "batch":
                loss = loss.sum() / w.sum()
            if self.class_weights_norm == "global":
                loss = loss.mean()
            else:
                loss = loss.mean()
            
            return loss

        if self.reduction == "mean":
            loss = loss.mean()
        elif self.reduction == "sum":
            loss = loss.sum()
        return loss    

def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, p_trainable=False):
        super(GeM,self).__init__()
        if p_trainable:
            self.p = Parameter(torch.ones(1)*p)
        else:
            self.p = p
        self.eps = eps

    def forward(self, x):
        ret = gem(x, p=self.p, eps=self.eps)   
        return ret
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

In [8]:


class Net(nn.Module):
    def __init__(self, cfg):
        super(Net, self).__init__()

        self.cfg = cfg
        self.n_classes = self.cfg.n_classes
        self.backbone = timm.create_model(cfg.backbone, 
                                          pretrained=cfg.pretrained, 
                                          num_classes=0, 
                                          global_pool="", 
                                          in_chans=self.cfg.in_channels,features_only = True)

        
        #if ("efficientnet" in cfg.backbone) & (self.cfg.stride is not None):
        #    self.backbone.conv_stem.stride = self.cfg.stride
        backbone_out = self.backbone.feature_info[-1]['num_chs']
        #backbone_out_1 = self.backbone.feature_info[-2]['num_chs']

        if cfg.pool == "gem":
            self.global_pool1 = GeM(p_trainable=cfg.gem_p_trainable)
            self.global_pool2 = GeM(p_trainable=cfg.gem_p_trainable)
        elif cfg.pool == "identity":
            self.global_pool1 = torch.nn.Identity()
        elif cfg.pool == "avg":
            self.global_pool1 = nn.AdaptiveAvgPool2d(1)
            self.global_pool2 = nn.AdaptiveAvgPool2d(1)

        self.embedding_size = backbone_out

        feature_dim_l_g = 688

        self.neck =nn.Sequential(nn.Linear(feature_dim_l_g, self.embedding_size, bias=True),nn.PReLU())
        self.bottleneck = nn.BatchNorm1d(self.embedding_size)
        self.bottleneck.bias.requires_grad_(False)  # no shift
        self.bottleneck.apply(weights_init_kaiming)
        
        
        if not self.cfg.headless:    
            self.head_in_units = self.embedding_size
            self.head = ArcMarginProduct_subcenter(self.embedding_size, self.n_classes)
        else:
            self.head = nn.Linear(self.embedding_size, self.n_classes, bias=False)
        if self.cfg.loss == 'adaptive_arcface':
            self.loss_fn = ArcFaceLossAdaptiveMargin(cfg.margins,self.n_classes,cfg.arcface_s)
        elif self.cfg.loss == 'arcface':
            self.loss_fn = ArcFaceLoss(cfg.arcface_s,cfg.arcface_m,label_smoothing=0.1)
        elif self.cfg.loss == 'cross_entropy':
            self.loss_fn = CrossEntropyLabelSmooth(num_classes=self.n_classes)
        else:
            pass
        
    def forward(self, batch):

        x = batch['input']
        
        dev = x.device

        x = self.backbone(x)
        
        x_l = self.global_pool1(x[-2])[:,:,0,0]
        x_g = self.global_pool2(x[-1])[:,:,0,0]

        x_g = torch.cat([x_g,x_l],axis=1) 
        x_g = self.neck(x_g)
        
        x_emb = self.bottleneck(x_g)

        

        logits = self.head(x_emb)
        
        if self.training:
            loss = self.loss_fn(logits, batch['target'].long())
            return {"logits":logits ,"loss":loss,"target": batch['target'],'embeddings':x_g}

        else:
            loss = self.loss_fn(logits, batch['target'].long())
            return {"logits":logits ,"loss":loss, "target": batch['target'],'embeddings': x_emb
                   }

    def freeze_weights(self, freeze=[]):
        for name, child in self.named_children():
            if name in freeze:
                for param in child.parameters():
                    param.requires_grad = False


    def unfreeze_weights(self, freeze=[]):
        for name, child in self.named_children():
            if name in freeze:
                for param in child.parameters():
                    param.requires_grad = True

In [9]:
import torch.cuda.amp as amp
class AmpNet(Net):
    
    def __init__(self,cfg):
        super(AmpNet, self).__init__(cfg)
    @torch.cuda.amp.autocast()
    def forward(self,*args):
        return super(AmpNet, self).forward(*args)

is_mixed_precision = True  #True #False

In [10]:
def getModel(cfg):
    model = AmpNet(cfg)
    if GPU:
        model.cuda()
    return model

def getOptimzersScheduler(model,cfg,steps_in_epoch=25,pct_start=0.1):
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learning_rate)
    
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,steps_per_epoch=1,
                                                    pct_start=pct_start,
                                                    max_lr=cfg.learning_rate,
                                                    epochs  = cfg.max_epochs, 
                                                    div_factor = cfg.div_factor, 
                                                    final_div_factor=cfg.final_div_factor,
                                                    verbose=True)
    
    return optimizer,scheduler,False
    

In [11]:
def save_model(epoch,model,ckpt_path='./',name='unet_effnet_b1',val_loss=0):
    path = os.path.join(ckpt_path, '{}_{}_{}.pth'.format(name, epoch,val_loss))
    torch.save(model.state_dict(), path, _use_new_zipfile_serialization=False)
    
def load_model(model,ckpt_path):
    state = torch.load(ckpt_path)
    new_state = {}
    for k,v in state.items():
        if not k.startswith('head'):
            new_state[k]=v
    model.load_state_dict(new_state,strict=False)
    return model

In [12]:
def getDataLoader(cfg,train_x,train_x_smp,val_x):
    
    train_dataset = BelugaDataset(
            df=train_x, transforms=cfg.train_aug,is_train=True
        )
    
    train_dataset_emb = BelugaDataset(
            df=train_x_smp, transforms=cfg.val_aug
        )
        
    val_dataset = BelugaDataset(df=val_x, transforms=cfg.val_aug)
    
    trainDataLoader = torch.utils.data.DataLoader(
                            train_dataset,
                            batch_size=cfg.batch_size,
                            num_workers=cfg.num_workers,
                            shuffle=True,
                            pin_memory=False,
                            drop_last=True,
                            worker_init_fn=lambda id: np.random.seed(torch.initial_seed() // 2 ** 32 + id)
                        )
    
    valDataLoader = torch.utils.data.DataLoader(
                        val_dataset,
                        batch_size=cfg.batch_size,
                        num_workers=cfg.num_workers,
                        shuffle=False,
                        pin_memory=False,
                    )
    
    trainDataLoader_emb = torch.utils.data.DataLoader(
                            train_dataset_emb,
                            batch_size=cfg.batch_size,
                            num_workers=cfg.num_workers,
                            shuffle=False,
                            pin_memory=False,
                        )
    
    return trainDataLoader,trainDataLoader_emb,valDataLoader

In [13]:
def training_step(model, batch, batch_idx,optimizer,scheduler,isStepScheduler=False):
    # Load images and labels
    x = batch["image"].float().cuda()
    labels = batch["target"].float().cuda()

    
    optimizer.zero_grad()
    if is_mixed_precision:
        with amp.autocast():
            
            batch = {'input':x,'target':labels}
            
            out = model(batch)
            
            loss = out['loss']
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            #torch.nn.utils.clip_grad_norm_(net.parameters(), 2)
            scaler.step(optimizer)
            scaler.update()
            loss = loss.item()
        
    if isStepScheduler:
        scheduler.step()

    return loss

def validation_step(model, batch, batch_idx):
    # Load images and labels
    x = batch["image"].float().cuda()
    target = batch["target"].long().cuda()
    batch1 = {'input':x,'target':target}


    # Forward pass & softmax
    with torch.no_grad():
        if is_mixed_precision:
            with amp.autocast():
                out = model(batch1)
                loss = out['loss']
                loss = loss.item()
                
                embeddings = out['embeddings'].cpu().numpy()
        
    
    embeddings_dict={}
    image_ids = batch['image_id']
    for i in range(len(image_ids)):
        embeddings_dict[image_ids[i]] = embeddings[i]
        
    return embeddings_dict,loss

In [14]:
def train_epoch(model,trainDataLoader,optimizer,scheduler,isStepScheduler=True):
    total_loss=0
    model.train()
    torch.set_grad_enabled(True)
    total_step=0

    pbar = tqdm(enumerate(trainDataLoader),total=len(trainDataLoader))
    for bi,data in pbar:
        loss = training_step(model,data,bi,optimizer,scheduler)
        total_loss+=loss
        total_step+=1

        pbar.set_postfix({'loss':total_loss/total_step})
        
    if not isStepScheduler: #in case epoch based scheduler
        scheduler.step()
            
    total_loss /= total_step
    return total_loss
        

def val_epoch(model,valDataLoader):
    total_loss=0
    embeddings_dicts = {}
    total_step=0
    model.eval()
    pbar=tqdm(enumerate(valDataLoader),total=len(valDataLoader))
    for bi,data in pbar :
        embeddings_dict,loss = validation_step(model,data,bi)
        
        total_loss += loss
        total_step += 1
        pbar.set_postfix({'loss':total_loss/total_step})
        
        for k,v in embeddings_dict.items():
            embeddings_dicts[k]=v
        
    return embeddings_dicts,total_loss/total_step


        

In [15]:
def createValLabels(valq,valdb, sc_no):
    
    #print(valq.columns)
    #print(valdb.columns)
    
    labeldf = pd.DataFrame()
    for i,r in valq.iterrows():
        db = valdb
        db = db[db.image_id != r.image_id].copy()
        db['target'] = (db.whale_id == r.whale_id).astype(int)
        
        if db.target.sum() < 1:
            continue
         
        lbl = db[db.target==1].copy()
        
        lbl['query_id'] = f'scenario_{sc_no}_' + r.image_id
        
        labeldf = pd.concat([labeldf,lbl])
      
    labeldf= labeldf.rename(columns={'image_id':'database_image_id'})
    return labeldf.reset_index(drop=True)

def createValScenarios(val_x, train_x_smp):
    label_dfs = []
    query_dfs = []
    database_dfs = []
    
    
    
    val_q_master = val_x[['image_id','viewpoint','whale_id','year']].copy()
    val_d_master = pd.concat([val_x[['image_id','viewpoint','whale_id','year']],train_x_smp[['image_id','viewpoint','whale_id','year']]]).copy()
    
    sc_no=1 
    #top view
    
    val_q = val_q_master[val_q_master.viewpoint == 'top'].copy()
    val_d = val_d_master[val_d_master.viewpoint == 'top'].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    
    #top view 2017
    sc_no+=1
    
    val_q = val_q_master[(val_q_master.viewpoint == 'top') & (val_q_master.year == 2017)].copy()
    val_d = val_d_master[(val_d_master.viewpoint == 'top') & (val_d_master.year == 2017)].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    #top view 2018
    sc_no+=1
    
    val_q = val_q_master[(val_q_master.viewpoint == 'top') & (val_q_master.year == 2018)].copy()
    val_d = val_d_master[(val_d_master.viewpoint == 'top') & (val_d_master.year == 2018)].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    #top view 2019
    sc_no+=1
    
    val_q = val_q_master[(val_q_master.viewpoint == 'top') & (val_q_master.year == 2019)].copy()
    val_d = val_d_master[(val_d_master.viewpoint == 'top') & (val_d_master.year == 2019)].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    #top view q 2019 d 2017
    sc_no+=1
    
    val_q = val_q_master[(val_q_master.viewpoint == 'top') & (val_q_master.year == 2019)].copy()
    val_d = val_d_master[(val_d_master.viewpoint == 'top') & (val_d_master.year == 2017)].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    #top view q 2019 d 2018
    sc_no+=1
    
    val_q = val_q_master[(val_q_master.viewpoint == 'top') & (val_q_master.year == 2019)].copy()
    val_d = val_d_master[(val_d_master.viewpoint == 'top') & (val_d_master.year == 2018)].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    # q top view q d not top view
    sc_no+=1
    
    val_q = val_q_master[val_q_master.viewpoint == 'top'].copy()
    val_d = val_d_master[val_d_master.viewpoint != 'top'].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    # q top view q q not top view
    sc_no+=1
    
    val_q = val_q_master[val_q_master.viewpoint != 'top'].copy()
    val_d = val_d_master[val_d_master.viewpoint == 'top'].copy()
    labeldf = createValLabels(val_q,val_d,sc_no)

    val_q['query_id'] = f'scenario_{sc_no}_' + val_q.image_id
    val_d = val_d.rename(columns={'image_id':'database_image_id'})
    val_q = val_q.rename(columns={'image_id':'query_image_id'})
    
    label_dfs.append(labeldf)
    query_dfs.append(val_q)
    database_dfs.append(val_d)
    
    for i in range(len(label_dfs)):
        print(i,query_dfs[i].shape,database_dfs[i].shape,label_dfs[i].shape)

    return query_dfs, database_dfs, label_dfs

In [16]:
GPU=True

from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler
def val_score(img_embs,queries,databases,labels):
    
    scenario_pred_dfs= []

    for sc_no in range(len(labels)):
        qs = queries[sc_no]
        dbs = databases[sc_no]
        lbls = labels[sc_no]
        
        db_keys = []
        db_embeddings = []

        for d in dbs['database_image_id'].values:
            db_keys.append(d)
            db_embeddings.append(img_embs[d])
            
        db_embeddings = np.stack(db_embeddings)
        db_keys = np.array(db_keys)
        #print('Scenario',sc_no,db_embeddings.shape,qs.shape,dbs.shape,lbls.shape)
        
        pred_dfs = []
        
        try:
            for i,r in qs.iterrows():
                query_image_id = r.query_image_id
                q_emb = img_embs[query_image_id]
                df = pd.DataFrame()
                #edis = euclidean_distances(q_emb.reshape(1,-1), db_embeddings)[0]
                #edis = 1-MinMaxScaler().fit_transform(edis.reshape(-1,1)).reshape(-1)
                sims = cosine_similarity(q_emb.reshape(1,-1), db_embeddings)[0]
                qry_result = pd.DataFrame()
                qry_result['database_image_id'] = db_keys
                qry_result['score'] = (sims + 1)/2 #edis#
                qry_result['query_id'] = r.query_id
                qry_result = qry_result[qry_result.database_image_id != query_image_id]

                qry_result = qry_result.sort_values('score',ascending=False).head(20)
                
                #print('qry_result',qry_result.sort_values('score',ascending=False).head(50))
                pred_dfs.append(qry_result)


            pred_dfs = pd.concat(pred_dfs)
            pred_dfs['tmp'] = pred_dfs.query_id.apply(lambda x: x.split('-')[-1])
            pred_dfs = pred_dfs[pred_dfs.database_image_id != pred_dfs.tmp]
            pred_dfs = pred_dfs.reset_index(drop=True)
            del pred_dfs['tmp']
            pred_dfs = pred_dfs.set_index('query_id')
            lbls = lbls.copy().set_index('query_id')

            map_score = MeanAveragePrecision()
        
            score = map_score.score(pred_dfs,lbls,PREDICTION_LIMIT)
            print('Scenario',sc_no,score)
            scenario_pred_dfs.append(pred_dfs.reset_index(drop=False))
        except ValueError as e:
            print('ValueError',e)
            return 0
        

    scenario_pred_dfs = pd.concat(scenario_pred_dfs)
    labels = pd.concat(labels)
    
    scenario_pred_dfs['tmp'] = scenario_pred_dfs.query_id.apply(lambda x: x.split('-')[-1])
    
    scenario_pred_dfs = scenario_pred_dfs[scenario_pred_dfs.database_image_id != scenario_pred_dfs.tmp]
    scenario_pred_dfs = scenario_pred_dfs.reset_index(drop=True)
    print('VAL:',scenario_pred_dfs.shape,labels.shape)
    #print(scenario_pred_dfs)
    del scenario_pred_dfs['tmp']
    
    scenario_pred_dfs['score'] = scenario_pred_dfs['score']
    #scenario_pred_dfs['score'] = np.clip(scenario_pred_dfs['score'].values,1e-4,0.9999)
    
    scenario_pred_dfs = scenario_pred_dfs.set_index('query_id')
    labels = labels.set_index('query_id')
    
    map_score = MeanAveragePrecision()
    score = map_score.score(scenario_pred_dfs,labels,PREDICTION_LIMIT)
    return score
    
def training_loop(cfg,train_x,trainX_smp,val_x,savedir='./',mdl_name='resnet34'):
    
    #create model
    max_epochs = cfg
    model = getModel(cfg)
    #load model
    if resume:
        print(f'Loading model {resume_ckpt}')
        model = load_model(model,resume_ckpt)
    #get loaders
    
    
    queries,databases,labels = createValScenarios(val_x,trainX_smp)

    trainDataLoader,trainDataLoader_emb,valDataLoader = getDataLoader(cfg,train_x,trainX_smp,val_x)
    
    optimizer,scheduler,isStepScheduler = getOptimzersScheduler(model,cfg,
                                                                steps_in_epoch=len(trainDataLoader),
                                                                pct_start=0.1)
    best_score = 0
    best_loss = 999
    #control loop
    for e in range(cfg.max_epochs):

        train_loss = train_epoch(model,trainDataLoader,optimizer,scheduler,isStepScheduler)
        img_embs,vloss = val_epoch(model,valDataLoader)
        train_img_embs,_ = val_epoch(model,trainDataLoader_emb)
        
        for k,v in train_img_embs.items():
            img_embs[k]=v

        score = val_score(img_embs,queries,databases,labels)
        #logging here
        print(e,'Train Result',f'loss={train_loss} ')
        print(e,'Val Result',f'comp metric={score} {vloss}')
        if score > best_score : #score > best_score:
            print(f'Saving for best_score {score}')
            save_model(e,model,ckpt_path=savedir,name=mdl_name)
            best_score=score
        else:
            print(f'Not Saving for score {score} ')
    return best_score

In [17]:
cfg = SimpleNamespace(**{})
cfg.n_classes = 788
cfg.backbone = "tf_efficientnet_b5_ns"
cfg.pretrained = True
cfg.embedding_size = 512
cfg.pool = "gem"
cfg.gem_p_trainable = True
cfg.in_channels = 3
cfg.loss = 'arcface'
cfg.arcface_s = 45
cfg.arcface_m = 0.3
cfg.arcface_m_x =  0.45
cfg.arcface_m_y = 0.05
cfg.headless = False

cfg.learning_rate = 1e-3
cfg.max_epochs = 41
cfg.div_factor = 100
cfg.final_div_factor = 200
cfg.batch_size=48
cfg.num_workers=0
cfg.img_size = (512,256)
cfg.img_size1 = (512+64,256+32)
# AUGS

image_size0 = cfg.img_size[0]
image_size1 = cfg.img_size[1]

cfg.train_aug = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, border_mode=0, p=0.5),
        A.GaussianBlur(blur_limit=(3, 7), p=0.2),
        A.Posterize(p=0.1),
        A.OneOf([
            A.Sharpen(p=0.4),
            A.ToGray(p=0.4),
            A.CLAHE(p=0.2),
        ], p=0.35),
        A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, p=0.5),
        #A.RandomSnow(p=0.1),
        #A.RandomRain(p=0.05),
        A.Rotate(limit=(180,180),p=0.25),
        A.Resize(cfg.img_size1[0] , cfg.img_size1[1] , p=1),
        A.RandomCrop(image_size0, image_size1, p=1),
        A.Cutout(max_h_size=int(image_size0 * 0.1), max_w_size=int(image_size1 * 0.1), num_holes=3, p=0.35),
    ])


cfg.val_aug = A.Compose([
        A.Resize(image_size0, image_size1),
    ])

cfg.n_splits=5




In [18]:
scaler = amp.GradScaler()

In [19]:
import random
seed=42
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms = True
    random.seed(0)
    np.random.seed(0)
set_seed(seed)



In [20]:
train_metadata = pd.read_csv('../../input/metadata.csv')
random.seed(9)  # set a seed for reproducibility

In [21]:
from sklearn.preprocessing import LabelEncoder
lbl= LabelEncoder()

In [22]:
#train_metadata = train_metadata[train_metadata.viewpoint=='top'].reset_index(drop=True)
train_metadata = pd.read_csv('../../input/metadata.csv')
train_metadata['whale_id'] = train_metadata.whale_id.apply(lambda x: int(x.replace('whale','')))
train_metadata['encounter_id_int'] = lbl.fit_transform(train_metadata['encounter_id'].values.reshape(-1,1)) #train_metadata['whale_id'] 
train_metadata['timestamp'] = pd.to_datetime(train_metadata['timestamp'])
train_metadata['year'] = train_metadata['timestamp'].dt.year

train_metadata['timestamp'] = pd.to_datetime(train_metadata['timestamp'])
train_metadata['year'] = train_metadata['timestamp'].dt.year
train_metadata['month'] = train_metadata['timestamp'].dt.month
train_metadata['orig_index'] = train_metadata.index
train_metadata = train_metadata.sort_values(['whale_id','timestamp'])
train_metadata.shape


  y = column_or_1d(y, warn=True)


(5902, 13)

In [23]:
train_metadata['time_delta'] = train_metadata.groupby('whale_id').timestamp.apply(lambda x: (x-x.shift(1)))
train_metadata['time_delta'] = train_metadata['time_delta'].apply(lambda x: x.seconds/60).fillna(36000)

train_metadata.shape

(5902, 14)

In [24]:
TIME_DELTA=10
def segment_enc(ser):
    seg_id = 0
    seg_ids = []
    for ix in ser.index:
        if ser.loc[ix] > TIME_DELTA:
            seg_id+=1
            seg_ids.append(seg_id)
            
        else:
            seg_ids.append(seg_id)
            
    return np.array(seg_ids)
train_metadata['enc_seg_id'] = train_metadata.groupby('whale_id').time_delta.transform(segment_enc)            

train_metadata['enc_seg_id'] = train_metadata.whale_id.astype(str) + '_' + train_metadata['enc_seg_id'].astype(str)

In [25]:
train_metadata['month_wid'] = train_metadata.whale_id.astype(str) + train_metadata.year.astype(str)  + train_metadata.month.astype(str)

In [26]:
train_metadata['cnt'] = train_metadata.groupby('whale_id').whale_id.transform('count')

In [27]:
train_metadata=train_metadata.sort_values('orig_index').reset_index(drop=True)

In [28]:
tmp = np.sqrt(1 / np.sqrt(train_metadata['whale_id'].value_counts().sort_index().values))
cfg.margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * cfg.arcface_m_x + cfg.arcface_m_y
cfg.margins.shape

(788,)

In [29]:
from sklearn.model_selection import GroupKFold, KFold, StratifiedKFold
group_kfold =GroupKFold(n_splits=cfg.n_splits)
train = train_metadata.copy()


fn=0
for train_index, test_index in group_kfold.split(train.image_id, train.whale_id, train.month_wid):
    train.loc[test_index,'fold'] = fn
    fn+=1

In [30]:
resume=False

In [31]:
import gc
version='reid_trick_effnet_v2'
fn=0

cfg.learning_rate = 1e-3

for fn in [0,1,2,3,4]:  
    set_seed()
    mdl_name=cfg.backbone
    savedir = f'trained-models-v{version}'
    Path(savedir).mkdir(exist_ok=True, parents=True)
    
    valX = train[train.fold==fn].copy().reset_index(drop=True)
    trainX = train[train.fold!=fn].copy().reset_index(drop=True)
    
    
    
    v_w_s= set(valX.whale_id.unique())
    t_w_s= set(trainX.whale_id.unique())
    
    print('v_w_s',len(v_w_s.intersection(t_w_s)))
    print('t_w_s',len(t_w_s.difference(v_w_s)))
    
    print(len(t_w_s), len(v_w_s))
    
    #print(valX[valX.whale_id==6]  )
    #print(trainX[trainX.whale_id==6])
    trainX_smp = trainX[trainX.time_delta>0.5].copy().reset_index(drop=True)
    valX = valX[valX.time_delta>1].copy().reset_index(drop=True)
    
    
    v_w_s= set(valX.whale_id.unique())
    t_w_s= set(trainX.whale_id.unique())
    
    print('v_w_s',len(v_w_s.intersection(t_w_s)))
    print('t_w_s',len(t_w_s.difference(v_w_s)))
    
    print('set operation',len(t_w_s), len(v_w_s))
    
    print('Train',trainX.shape,'trainX_smp',trainX_smp.shape,'Val',valX.shape)
    
    training_loop(cfg,trainX,trainX_smp,valX,savedir=savedir,mdl_name=f'{mdl_name}-fold{fn}')
    gc.collect()

    #break

v_w_s 50
t_w_s 601
651 187
v_w_s 50
t_w_s 601
set operation 651 187
Train (4721, 18) trainX_smp (1908, 18) Val (434, 18)
0 (376, 5) (2044, 4) (1809, 6)
1 (137, 5) (689, 4) (280, 6)
2 (110, 5) (671, 4) (266, 6)
3 (129, 5) (684, 4) (326, 6)
4 (129, 5) (689, 4) (142, 6)
5 (129, 5) (671, 4) (247, 6)
6 (376, 5) (298, 4) (171, 6)
7 (58, 5) (2044, 4) (183, 6)
Adjusting learning rate of group 0 to 1.0000e-05.


  0%|          | 0/98 [00:00<?, ?it/s]

TypeError: image must be numpy array type