In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [251]:
import torch
import torchaudio as ta
import torchaudio.functional as taf
import torchaudio.transforms as tat
from torchvision import transforms
from torch.nn.parallel import DistributedDataParallel, DataParallel


print(torch.__version__)        
print(ta.__version__)

import matplotlib
import matplotlib.pyplot as plt
from IPython.display import Audio, display

import pandas as pd
import os
import pprint

from typing import *
import itertools
from collections import Counter
import timm

import numpy as np
from datetime import datetime
from lark.config import Config
from lark.learner import Learner
from lark.ops import Sig2Spec, MixedSig2Spec

from tqdm import tqdm
from lark.ops import f1



1.8.1
0.8.0a0+e4e171a


In [3]:
rank = 0

In [4]:
cfg = Config(
        sites=['COR', 'SSW'],
        use_neptune=True,
        n_epochs=10,
        bs=64,
        n_samples_per_label=2000,
        lr=1e-3,
        model='tf_efficientnet_b0_ns',
        scheduler='torch.optim.lr_scheduler.CosineAnnealingLR',
        loss_fn='lark.ops.SigmoidFocalLossStar',
        use_pink_noise=0.1,
        use_recorded_noise=0.2,
        use_overlays=True,
        apply_filter=0.1,
        seed=231,
        n_workers=6,
    )

In [27]:

prep = MixedSig2Spec(cfg, rank)
main_model = timm.create_model('tf_efficientnet_b0_ns', pretrained=True)
main_model.classifier = torch.nn.Linear(in_features=1280, out_features=len(cfg.labels), bias=True)
model = torch.nn.Sequential(prep, main_model)
ddp_model = DataParallel(model, device_ids=[rank])

In [29]:
lrn = Learner("tf_efficientnet_b0_ns", cfg, rank, ddp_model)

tf_efficientnet_b0_ns-20210604-054722:0


In [30]:
lrn.model.state_dict().keys()

odict_keys(['module.1.conv_stem.weight', 'module.1.bn1.weight', 'module.1.bn1.bias', 'module.1.bn1.running_mean', 'module.1.bn1.running_var', 'module.1.bn1.num_batches_tracked', 'module.1.blocks.0.0.conv_dw.weight', 'module.1.blocks.0.0.bn1.weight', 'module.1.blocks.0.0.bn1.bias', 'module.1.blocks.0.0.bn1.running_mean', 'module.1.blocks.0.0.bn1.running_var', 'module.1.blocks.0.0.bn1.num_batches_tracked', 'module.1.blocks.0.0.se.conv_reduce.weight', 'module.1.blocks.0.0.se.conv_reduce.bias', 'module.1.blocks.0.0.se.conv_expand.weight', 'module.1.blocks.0.0.se.conv_expand.bias', 'module.1.blocks.0.0.conv_pw.weight', 'module.1.blocks.0.0.bn2.weight', 'module.1.blocks.0.0.bn2.bias', 'module.1.blocks.0.0.bn2.running_mean', 'module.1.blocks.0.0.bn2.running_var', 'module.1.blocks.0.0.bn2.num_batches_tracked', 'module.1.blocks.1.0.conv_pw.weight', 'module.1.blocks.1.0.bn1.weight', 'module.1.blocks.1.0.bn1.bias', 'module.1.blocks.1.0.bn1.running_mean', 'module.1.blocks.1.0.bn1.running_var', 'mo

In [38]:
kind = "best"
cp_name = f"tf_efficientnet_b0_ns-full-20210602-052907-{kind}"

In [39]:
lrn.load_checkpoint("", cp_name)

{'epoch': 3,
 'valid_loss': 0.008241296271825665,
 'valid_score': 0.3062037527561188}

In [40]:
# checkpoint = torch.load(f"checkpoints/{cp_name}.pt")

In [41]:
# checkpoint['model_state_dict'].keys()

In [71]:
lrn.evaluate()

100%|██████████| 38/38 [00:03<00:00,  9.67it/s]


Unnamed: 0,thresh,tp,tn,fp,fn,f1
0,0.0,1183,0,114017,0,0.020329
1,0.1,778,106370,7647,405,0.161948
2,0.2,546,112028,1989,637,0.293706
3,0.3,411,113063,954,772,0.322606
4,0.4,327,113495,522,856,0.32185
5,0.5,269,113712,305,914,0.306204
6,0.6,215,113833,184,968,0.271808
7,0.7,158,113914,103,1025,0.218837
8,0.8,114,113952,65,1069,0.167401
9,0.9,66,113990,27,1117,0.103448


# CPMP postprocessing
https://www.kaggle.com/c/birdclef-2021/discussion/243378

In [924]:
occur = np.zeros((cfg.n_labels, cfg.n_labels), dtype='int')

In [925]:
occur.shape

(48, 48)

In [926]:
df_meta = lrn.tdl.dataset.df_meta
labels = lrn.tdl.dataset.labels
indices = lrn.tdl.dataset.indices

In [927]:
# df = df_meta[df_meta.primary_label != 'nocall']
df = df_meta[df_meta.primary_label.isin(labels)]

In [928]:
for primary, secondary in zip(df.primary_label, df.secondary_labels):
    for label in secondary:
        # if label in labels and label != primary:
        if label in labels:
            occur[indices[primary], indices[label]] = 1
            occur[indices[label], indices[primary]] = 1

In [929]:
occur

array([[1, 1, 1, ..., 1, 0, 0],
       [1, 0, 1, ..., 1, 1, 0],
       [1, 1, 1, ..., 1, 1, 0],
       ...,
       [1, 1, 1, ..., 1, 1, 0],
       [0, 1, 1, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [857]:
df_ss = pd.read_csv(f"{cfg.data_dir}/train_soundscape_labels.csv")
if cfg.sites is not None:
    df_ss = df_ss[df_ss['site'].isin(cfg.sites)].reset_index(drop=True)

In [70]:
df_ss

Unnamed: 0,row_id,site,audio_id,seconds,birds
0,7019_COR_5,COR,7019,5,nocall
1,7019_COR_10,COR,7019,10,nocall
2,7019_COR_15,COR,7019,15,nocall
3,7019_COR_20,COR,7019,20,nocall
4,7019_COR_25,COR,7019,25,nocall
...,...,...,...,...,...
2395,54955_SSW_580,SSW,54955,580,nocall
2396,54955_SSW_585,SSW,54955,585,grycat
2397,54955_SSW_590,SSW,54955,590,grycat
2398,54955_SSW_595,SSW,54955,595,nocall


In [176]:
df_ss[['site', 'audio_id']].drop_duplicates()

Unnamed: 0,site,audio_id
0,COR,7019
120,COR,7954
240,COR,11254
360,COR,18003
480,COR,21767
600,COR,26746
720,COR,31928
840,COR,44957
960,COR,50878
1080,COR,57610


In [243]:
ps, ys = lrn.validation_inference()
ps = ps.cpu()
ys = ys.cpu()

100%|██████████| 38/38 [00:03<00:00,  9.57it/s]


In [228]:
ps[0:120].shape

torch.Size([120, 48])

In [307]:
logits = ps

def process_preds(chunk, thr=0.5, incr=-0.1, occur_thr=0.3):
    logits = chunk.numpy().copy()
    logits_max = logits.max(0)
    if occur_thr:
        present = ((logits_max > thr) * 1).reshape((1, -1))
        secondary = np.matmul(present, occur)
        for j in range(logits.shape[1]):
            if secondary[:, j] > 0:
                logits[:, j] += occur_thr
        logits_max = logits.max(0)
    for j in range(logits.shape[1]):
        if logits_max[j] > thr:
            logits[:, j] += thr - incr
    return logits

In [308]:
def eval_chunk(ps, c, thr=0.5):
    fr = c * 120
    to = (c + 1) * 120
    f1s = f1(ys[fr:to], ps[fr:to], thr)['f1']
    pps = process_preds(ps[fr:to], thr=thr)
    pps_f1 = f1(ys[fr:to], pps, thr)['f1']
    # pps = logits[fr:to]
    return f1s, pps_f1
    
    # ts = np.arange(0.0, 2.1, 0.1)
    # rs = [f1(ys[fr:to], pps, t) for t in ts]
    # return pd.DataFrame(rs)

In [309]:
eval_chunk(ps, 7, 0.5)

(0.633093535900116, 0.08947368421052632)

# Triplex postprocessing

https://www.kaggle.com/c/birdclef-2021/discussion/243473

https://github.com/xins-yao/Kaggle_Birdcall_17th_solution/blob/master/inference.py

if model gets a confident prediction of any bird, then lower threshold for this bird in the same audio file

use thr_median as initial threshold

use thr_high for confident prediction

if any bird with probability higher than thr_high in any clip, lower threshold to thr_low for this specific bird in the same audio file

In [330]:
ps, ys = lrn.validation_inference()
ps = ps.cpu().numpy()
ys = ys.cpu().numpy()

100%|██████████| 38/38 [00:03<00:00,  9.62it/s]


In [331]:
def get_chunk(c, ps, ys):
    fr = c * 120
    to = (c + 1) * 120
    return ps[fr:to], ys[fr:to]

In [890]:
# def get_chunk_thresholds(chunk_nr, ps, thr_dict, chunk_size=120):
#     fr = chunk_nr * chunk_size
#     to = (chunk_nr + 1) * chunk_size
#     psc = ps[fr:to]
#     thresholds = np.ones_like(psc) * thr_dict['median']
#     is_confident = np.sum(psc > thr_dict['high'], axis=0).astype(bool)
#     thresholds[:, is_confident] = thr_dict['low']
#     return thresholds

def get_chunk_thresholds(chunk_nr, ps, thr_dict, occur, chunk_size=120):
    fr = chunk_nr * chunk_size
    to = (chunk_nr + 1) * chunk_size
    psc = ps[fr:to]
    thresholds = np.ones_like(psc) * thr_dict['median']
    is_confident = np.sum(psc > thr_dict['high'], axis=0).astype(bool)
    thresholds[:, is_confident] = thr_dict['low']
    
    thresholds[:, np.where(occur[is_confident])[0]] = thr_dict['corr']   
    
    return thresholds

def get_thresholds(ps, thr_dict, chunk_size=120):
    n_chunks = ps.shape[0] // chunk_size
    return np.concatenate([get_chunk_thresholds(i, ps, thr_dict, chunk_size) for i in range(n_chunks)])

def compute_f1(ps, ys, ts, combined: bool = True):
    f1s = metrics.f1_score(ys, ps >= ts, average='micro', zero_division=1)
    if combined:
        no_call_f1s = metrics.f1_score(np.abs(ys - 1), ps < ts, average='micro', zero_division=1)
        f1s = no_call_f1s * 0.54 + f1s * 0.46
    return f1s

def get_global_f1(ps, ys, td, chunk_size=120, combined=True):
    ts = get_thresholds(ps, td, chunk_size)
    return compute_f1(ps, ys, ts, combined)

def get_individual_f1(ps, ys, ts, chunk_size=120):
    n_chunks = ps.shape[0] // chunk_size
    for i in range(n_chunks):
        fr = i * chunk_size
        to = (i + 1) * chunk_size
        ts = get_chunk_thresholds(i, ps, ts, chunk_size)
        f1s = metrics.f1_score(ys[fr:to], ps[fr:to] >= ts, average='micro', zero_division=1)
        print(f1s)
        

In [891]:
def scan_thr_pars():
    max_f1 = -1
    max_ts = {}
    step = 0.1
    for l in np.arange(0, 1, step):
        for m in np.arange(l + step, 1, step):
            for h in np.arange(m + step, 1, step):
                ts = { 'high': h, 'median': m, 'low': l}
                fs = get_global_f1(ps, ys, ts, combined=True)
                if fs > max_f1:
                    max_f1 = fs
                    max_ts = ts
    return max_f1, max_ts

In [892]:
ms, mt = scan_thr_pars()
ms, mt

(0.7215068880852455, {'high': 0.9, 'median': 0.6000000000000001, 'low': 0.1})

In [569]:
get_individual_f1(ps, ys, mt)

0.0
1.0
0.24
0.8542713567839195
0.5550239234449761
0.27096774193548384
0.0
0.6666666666666666
0.038461538461538464
0.1988950276243094
0.49638554216867464
0.0
0.2619047619047619
0.0
0.0
0.05128205128205127
0.48135593220338985
0.3428571428571429
0.18309859154929575
0.0


# co-occurrence

In [881]:
chunk_nr = 4
chunk_size = 120

fr = chunk_nr * chunk_size
to = (chunk_nr + 1) * chunk_size
psc = ps[fr:to]
ysc = ys[fr:to]

In [915]:
def get_chunk_thresholds(chunk_nr, ps, thr_dict, chunk_size=120):
    fr = chunk_nr * chunk_size
    to = (chunk_nr + 1) * chunk_size
    psc = ps[fr:to]
    thresholds = np.ones_like(psc) * thr_dict['median']
    is_confident = np.sum(psc > thr_dict['high'], axis=0).astype(bool)
    thresholds[:, is_confident] = thr_dict['low']
    
    thresholds[:, np.where(occur[is_confident])[0]] = thr_dict['corr']   
    
    return thresholds

In [916]:
# def get_chunk_thresholds(chunk_nr, ps, thr_dict, chunk_size=120):
#     fr = chunk_nr * chunk_size
#     to = (chunk_nr + 1) * chunk_size
#     psc = ps[fr:to]
#     thresholds = np.ones_like(psc) * thr_dict['median']
#     is_confident = np.sum(psc > thr_dict['high'], axis=0).astype(bool)
#     thresholds[:, is_confident] = thr_dict['low']
#     return thresholds


In [917]:
occur.shape

(48, 48)

In [931]:
def scan_thr_pars():
    max_f1 = -1
    max_ts = {}
    step = 0.1
    for l in np.arange(0, 1, step):
        for m in np.arange(l + step, 1, step):
            for h in np.arange(m + step, 1.1, step):
                for c in np.arange(0, 1, step):
                    ts = { 'high': h, 'median': m, 'low': l, 'corr': c}
                    fs = get_global_f1(ps, ys, ts, combined=True)
                    if fs > max_f1:
                        max_f1 = fs
                        max_ts = ts
    return max_f1, max_ts

In [932]:
scan_thr_pars()

(0.7215889566207027,
 {'high': 0.9,
  'median': 0.6000000000000001,
  'low': 0.1,
  'corr': 0.7000000000000001})

In [923]:
scan_thr_pars()

(0.7215889566207027,
 {'high': 0.9,
  'median': 0.6000000000000001,
  'low': 0.1,
  'corr': 0.7000000000000001})

In [746]:
occur.shape

(48, 48)

In [858]:
ts, ic = get_chunk_thresholds(chunk_nr, ps, mt)

In [859]:
occur[ic].shape

(2, 48)

In [860]:
ts[2]

array([0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6,
       0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6,
       0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6,
       0.1, 0.6, 0.6, 0.6, 0.6, 0.1, 0.6, 0.6, 0.6], dtype=float32)

In [861]:
ts[:, 39]

array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1], dtype=float32)

In [862]:
np.where(ic)

(array([39, 44]),)

In [863]:
occur[44]

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])

In [864]:
# ts -= occur[ic][0] * 0.1
# ts -= occur[ic][1] * 0.1

In [865]:
# ts[:, occur[ic][0]] = 0.5

In [866]:
ts[:, np.where(occur[ic])[0]] = 0.1

In [867]:
ts

array([[0.1, 0.1, 0.6, ..., 0.6, 0.6, 0.6],
       [0.1, 0.1, 0.6, ..., 0.6, 0.6, 0.6],
       [0.1, 0.1, 0.6, ..., 0.6, 0.6, 0.6],
       ...,
       [0.1, 0.1, 0.6, ..., 0.6, 0.6, 0.6],
       [0.1, 0.1, 0.6, ..., 0.6, 0.6, 0.6],
       [0.1, 0.1, 0.6, ..., 0.6, 0.6, 0.6]], dtype=float32)

In [868]:
compute_f1(psc, ysc, ts, combined=True)

0.7798971684803697

# no_call f1 score

In [547]:
np.abs(ys - 1)

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]])

In [545]:
ps

array([[0.02217756, 0.03169936, 0.02676164, ..., 0.06467676, 0.07248168,
        0.06468667],
       [0.0087122 , 0.00517282, 0.00659248, ..., 0.01830112, 0.01274671,
        0.01552435],
       [0.01010807, 0.02155799, 0.01060296, ..., 0.02309404, 0.01885087,
        0.01700381],
       ...,
       [0.25503898, 0.00770985, 0.02734323, ..., 0.01818618, 0.01902353,
        0.00288344],
       [0.3078698 , 0.01508887, 0.02901221, ..., 0.02912963, 0.02364811,
        0.00442752],
       [0.12785846, 0.03092269, 0.07600214, ..., 0.0468911 , 0.00928991,
        0.00314113]], dtype=float32)

In [557]:
no_call_f1s = metrics.f1_score(np.abs(ys - 1), ps < get_thresholds(ps, mt), average='micro')

In [558]:
f1s = get_global_f1(ps, mt)

In [559]:
no_call_f1s * 0.54 + f1s * 0.46

0.7215068880852455

# package

In [934]:
from lark.post import CoOccurrence

In [939]:
cooc = CoOccurrence(None)

In [940]:
cooc.save("data/co-occurrence-complete.npy")

In [943]:
type(cooc.matrix)

numpy.ndarray

In [942]:
np.load("data/co-occurrence-complete.npy")

array([[0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       ...,
       [0, 1, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 1, 0]])

In [979]:
cooc_cor_ssw = CoOccurrence(['SSW', 'COR'])

In [982]:
cooc_cor_ssw.matrix.shape

(48, 48)

# kaggle submit-04

In [944]:
cfg = Config(sites=None)

In [970]:
prep = MixedSig2Spec(cfg, rank)
main_model = timm.create_model('tf_efficientnet_b0_ns', pretrained=True)
main_model.classifier = torch.nn.Sequential(torch.nn.Linear(in_features=1280, out_features=len(cfg.labels), bias=True))
model = torch.nn.Sequential(prep, main_model)
model = model.to(rank)

In [971]:
lrn = Learner("tf_efficientnet_b0_ns-full", cfg, rank, model)

tf_efficientnet_b0_ns-full-20210605-085736:0


In [972]:
cp_name = f"tf_efficientnet_b0_ns-full-20210525-181231-latest"

In [973]:
lrn.load_checkpoint("", cp_name)

{'epoch': 11,
 'valid_loss': 0.0011728350705622385,
 'valid_score': 0.11879698932170868}

In [974]:
from lark.post import PostProcessing
pp = PostProcessing(cfg, lrn.tdl.dataset)

In [975]:
ps, ys = lrn.validation_inference()

100%|██████████| 75/75 [00:04<00:00, 17.69it/s]


In [969]:
pp.scan_thr_pars(ps, ys)

(0.6584693276830287,
 {'high': 0.5000000000000001,
  'median': 0.4000000000000001,
  'low': 0.1,
  'corr': 0.30000000000000004})