# 🐦 BirdCLEF '21 -2nd place model - Submit [0.66]
## [BirdCLEF 2022](https://www.kaggle.com/c/birdclef-2022)
### Identify bird calls in soundscapes
![](https://storage.googleapis.com/kaggle-competitions/kaggle/33246/logos/header.png)

# BirdCLEF 2021 - 2nd place model - Submit [LB:0.66]

This is one of the models of the 2nd place solution ensemble of 2021' BirdCLEF competition, adapted to BirdCLEF 2022, using only the 21 relevant classes.

# Training notebook: [🐦 BirdCLEF '21 - 2nd place model - Train [0.66]](https://www.kaggle.com/code/julian3833/birdclef-2021-2nd-place-model-train-lb-0-66)

It uses MEL spectrograms, 5 secs chunking, GeM and a Resnet.

* Check the writedown by the original authors for more details: https://www.kaggle.com/c/birdclef-2021/discussion/243463
* Also their paper: http://ceur-ws.org/Vol-2936/paper-134.pdf 
* and their github, from where I've got the original code I have adapted: https://github.com/ChristofHenkel/kaggle-birdclef2021-2nd-place

# Please, _DO_ upvote if you found this notebook useful or interesing!

In [1]:
!pip install ../input/birds-inference-pip-wheels/torchaudio-0.8.1-cp37-cp37m-manylinux1_x86_64.whl ../input/birds-inference-pip-wheels/torch-1.8.1-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/birds-inference-pip-wheels/timm-0.4.8.zip --no-index --no-deps
#!pip install ../input/birdclef21trainmeta/timm-0.4.9_23052021/pytorch-image-models-master --no-index --no-deps
!pip install ../input/birds-inference-pip-wheels/audiomentations-0.16.0-py3-none-any.whl --no-index --no-deps
!pip install ../input/birds-inference-pip-wheels/torchlibrosa-0.0.9-py3-none-any.whl --no-index --no-deps

Processing /kaggle/input/birds-inference-pip-wheels/torchaudio-0.8.1-cp37-cp37m-manylinux1_x86_64.whl
Processing /kaggle/input/birds-inference-pip-wheels/torch-1.8.1-cp37-cp37m-manylinux1_x86_64.whl
Installing collected packages: torch, torchaudio
  Attempting uninstall: torch
    Found existing installation: torch 1.9.1
    Uninstalling torch-1.9.1:
      Successfully uninstalled torch-1.9.1
  Attempting uninstall: torchaudio
    Found existing installation: torchaudio 0.9.1
    Uninstalling torchaudio-0.9.1:
      Successfully uninstalled torchaudio-0.9.1
Successfully installed torch-1.8.1 torchaudio-0.8.1
Processing /kaggle/input/birds-inference-pip-wheels/timm-0.4.8.zip
  Preparing metadata (setup.py) ... [?25l- done
[?25hBuilding wheels for collected packages: timm
  Building wheel for timm (setup.py) ... [?25l- \ | done
[?25h  Created wheel for timm: filename=timm-0.4.8-py3-none-any.whl size=344972 sha256=dda43a4c5bb7608e058eee1c4ccac3a688b388d75bf4752

In [2]:
import timm
timm.__version__

'0.4.8'

In [3]:
import sys
import os
import importlib
import multiprocessing as mp

from tqdm import tqdm
import numpy as np
import pandas as pd
import glob
import torch
from copy import copy

from torch.utils.data import DataLoader

import pandas as pd
import timm
from torch import nn
import torch
import torchaudio as ta
from torch.cuda.amp import autocast
import random

from torch.nn import functional as F
from torch.distributions import Beta
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset

import numpy as np
import librosa
import ast

import os
from types import SimpleNamespace
import numpy as np

import numpy as np
import pandas as pd
import importlib
import sys
import random
from tqdm import tqdm
import gc
import argparse
import torch
from torch import optim
from torch.cuda.amp import GradScaler, autocast
from collections import defaultdict
import cv2
from copy import copy
import os
from transformers import get_cosine_schedule_with_warmup
from torch.utils.data import SequentialSampler, DataLoader


In [4]:
def set_seed(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

# Config

In [5]:
cfg = SimpleNamespace()

# paths
cfg.data_folder = ''
cfg.name = "julian"
cfg.data_dir = "../input/birdclef-2022/"
cfg.train_data_folder = cfg.data_dir + "train_audio/"
cfg.val_data_folder = cfg.data_dir + "train_audio/"
cfg.output_dir = "first_model"

# dataset
cfg.dataset = "base_ds"
cfg.min_rating = 0
cfg.val_df = None
cfg.batch_size_val = 1
cfg.train_aug = None
cfg.val_aug = None
cfg.test_augs = None
cfg.wav_len_val = 5  # seconds

# audio
cfg.window_size = 2048
cfg.hop_size = 512
cfg.sample_rate = 32000
cfg.fmin = 16
cfg.fmax = 16386
cfg.power = 2
cfg.mel_bins = 256
cfg.top_db = 80.0

# img model
cfg.backbone = "resnet18"
cfg.pretrained = True
cfg.pretrained_weights = None
cfg.train = True
cfg.val = False
cfg.in_chans = 1

cfg.alpha = 1
cfg.eval_epochs = 1
cfg.eval_train_epochs = 1
cfg.warmup = 0

cfg.mel_norm = False

cfg.label_smoothing = 0

cfg.remove_pretrained = []

# training
cfg.seed = 123
cfg.save_val_data = True

# ressources
cfg.mixed_precision = True
cfg.gpu = 0
cfg.num_workers = 4 # 18
cfg.drop_last = True 

cfg.mixup2 = 0

cfg.label_smoothing = 0

cfg.mixup_2x = False


cfg.birds = np.array(['afrsil1', 'akekee', 'akepa1', 'akiapo', 'akikik', 'amewig',
       'aniani', 'apapan', 'arcter', 'barpet', 'bcnher', 'belkin1',
       'bkbplo', 'bknsti', 'bkwpet', 'blkfra', 'blknod', 'bongul',
       'brant', 'brnboo', 'brnnod', 'brnowl', 'brtcur', 'bubsan',
       'buffle', 'bulpet', 'burpar', 'buwtea', 'cacgoo1', 'calqua',
       'cangoo', 'canvas', 'caster1', 'categr', 'chbsan', 'chemun',
       'chukar', 'cintea', 'comgal1', 'commyn', 'compea', 'comsan',
       'comwax', 'coopet', 'crehon', 'dunlin', 'elepai', 'ercfra',
       'eurwig', 'fragul', 'gadwal', 'gamqua', 'glwgul', 'gnwtea',
       'golphe', 'grbher3', 'grefri', 'gresca', 'gryfra', 'gwfgoo',
       'hawama', 'hawcoo', 'hawcre', 'hawgoo', 'hawhaw', 'hawpet1',
       'hoomer', 'houfin', 'houspa', 'hudgod', 'iiwi', 'incter1',
       'jabwar', 'japqua', 'kalphe', 'kauama', 'laugul', 'layalb',
       'lcspet', 'leasan', 'leater1', 'lessca', 'lesyel', 'lobdow',
       'lotjae', 'madpet', 'magpet1', 'mallar3', 'masboo', 'mauala',
       'maupar', 'merlin', 'mitpar', 'moudov', 'norcar', 'norhar2',
       'normoc', 'norpin', 'norsho', 'nutman', 'oahama', 'omao', 'osprey',
       'pagplo', 'palila', 'parjae', 'pecsan', 'peflov', 'perfal',
       'pibgre', 'pomjae', 'puaioh', 'reccar', 'redava', 'redjun',
       'redpha1', 'refboo', 'rempar', 'rettro', 'ribgul', 'rinduc',
       'rinphe', 'rocpig', 'rorpar', 'rudtur', 'ruff', 'saffin', 'sander',
       'semplo', 'sheowl', 'shtsan', 'skylar', 'snogoo', 'sooshe',
       'sooter1', 'sopsku1', 'sora', 'spodov', 'sposan', 'towsol',
       'wantat1', 'warwhe1', 'wesmea', 'wessan', 'wetshe', 'whfibi',
       'whiter', 'whttro', 'wiltur', 'yebcar', 'yefcan', 'zebdov'])


cfg.n_classes = len(cfg.birds)
# dataset
cfg.min_rating = 2.0

cfg.wav_crop_len = 30  # seconds

cfg.lr = 0.0001
cfg.epochs = 5
cfg.batch_size = 64
cfg.batch_size_val = 64
cfg.backbone = "resnet34"


cfg.save_val_data = True
cfg.mixed_precision = True

cfg.mixup = True
cfg.mix_beta = 1


cfg.train_df1 = "../input/birdclef-2022/train_metadata.csv"
cfg.train_df2 = "../input/birdclef-2022-df-train-with-durations/df-with-durations.csv"


cfg.device = 'cuda' if torch.cuda.is_available() else 'cpu'

cfg.tr_collate_fn = None
cfg.val_collate_fn = None
cfg.val = False

cfg.dev = False

cfg.model = "RN34"

cfg

namespace(data_folder='',
          name='julian',
          data_dir='../input/birdclef-2022/',
          train_data_folder='../input/birdclef-2022/train_audio/',
          val_data_folder='../input/birdclef-2022/train_audio/',
          output_dir='first_model',
          dataset='base_ds',
          min_rating=2.0,
          val_df=None,
          batch_size_val=64,
          train_aug=None,
          val_aug=None,
          test_augs=None,
          wav_len_val=5,
          window_size=2048,
          hop_size=512,
          sample_rate=32000,
          fmin=16,
          fmax=16386,
          power=2,
          mel_bins=256,
          top_db=80.0,
          backbone='resnet34',
          pretrained=True,
          pretrained_weights=None,
          train=True,
          val=False,
          in_chans=1,
          alpha=1,
          eval_epochs=1,
          eval_train_epochs=1,
          warmup=0,
          mel_norm=False,
          label_smoothing=0,
          remove_pretrained=[],

In [6]:
# cfg = importlib.import_module('default_config')
# importlib.reload(cfg)
# cfg = importlib.import_module('cfg_ps_6_v2')
# importlib.reload(cfg)
# cfg = copy(cfg.cfg)

TEST_AUDIO_ROOT = "../input/birdclef-2022/test_soundscapes/"
cfg.val_data_folder = TEST_AUDIO_ROOT
cfg.pretrained = False


print(cfg.model, cfg.dataset, cfg.backbone, cfg.pretrained_weights, cfg.mel_norm)


RN34 base_ds resnet34 None False


In [7]:
def batch_to_device(batch, device):
    batch_dict = {key: batch[key].to(device) for key in batch}
    return batch_dict



class CustomDataset(Dataset):
    def __init__(self, df, cfg, aug, mode="train"):

        self.cfg = cfg
        self.mode = mode
        self.df = df.copy()

        self.bird2id = {bird: idx for idx, bird in enumerate(cfg.birds)}
        if self.mode == "train":
            self.data_folder = cfg.train_data_folder
            self.df = self.df[self.df["rating"] >= self.cfg.min_rating]
        elif self.mode == "val":
            self.data_folder = cfg.val_data_folder
        elif self.mode == "test":
            self.data_folder = cfg.test_data_folder

        self.fns = self.df["filename"].unique()

        self.df = self.setup_df()

        self.aug_audio = cfg.train_aug

    def setup_df(self):
        df = self.df.copy()

        if self.mode == "train":

            df["weight"] = np.clip(df["rating"] / df["rating"].max(), 0.1, 1.0)
            df['target'] = df['primary_label'].apply(self.bird2id.get)
            labels = np.eye(self.cfg.n_classes)[df["target"].astype(int).values]
            label2 = df["secondary_labels"].apply(lambda x: self.secondary2target(x)).values
            for i, t in enumerate(label2):
                labels[i, t] = 1
        else:
            targets = df["birds"].apply(lambda x: self.birds2target(x)).values
            labels = np.zeros((df.shape[0], self.cfg.n_classes))
            # import pdb; pdb.set_trace()
            for i, t in enumerate(targets):
                labels[i, t] = 1

        df[[f"t{i}" for i in range(self.cfg.n_classes)]] = labels

        if self.mode != "train":
            df = df.groupby("filename")

        return df

    def __getitem__(self, idx):

        if self.mode == "train":
            row = self.df.iloc[idx]
            fn = row["filename"]
            label = row[[f"t{i}" for i in range(self.cfg.n_classes)]].values
            weight = row["weight"]
            #fold = row["fold"]
            fold = -1

            #wav_len = row["length"]
            parts = 1
        else:
            fn = self.fns[idx]
            row = self.df.get_group(fn)
            label = row[[f"t{i}" for i in range(self.cfg.n_classes)]].values
            wav_len = None
            # Este es mi "entrada" a que un audio dure mucho
            parts = label.shape[0]
            fold = -1
            weight = 1

        if self.mode == "train":
            #wav_len_sec = wav_len / self.cfg.sample_rate
            wav_len_sec = row['duration']
            duration = self.cfg.wav_crop_len
            max_offset = wav_len_sec - duration
            max_offset = max(max_offset, 1)
            offset = np.random.randint(max_offset)
        else:
            offset = 0.0
            duration = None

        wav = self.load_one(fn, offset, duration)

        if wav.shape[0] < (self.cfg.wav_crop_len * self.cfg.sample_rate):
            pad = self.cfg.wav_crop_len * self.cfg.sample_rate - wav.shape[0]
            wav = np.pad(wav, (0, pad))

        if self.mode == "train":
            if self.aug_audio:
                wav = self.aug_audio(samples=wav, sample_rate=self.cfg.sample_rate)
        else:
            if self.cfg.val_aug:
                wav = self.cfg.val_aug(samples=wav, sample_rate=self.cfg.sample_rate)

        wav_tensor = torch.tensor(wav)  # (n_samples)
        if parts > 1:
            n_samples = wav_tensor.shape[0]
            wav_tensor = wav_tensor[: n_samples // parts * parts].reshape(
                parts, n_samples // parts
            )

        feature_dict = {
            "input": wav_tensor,
            "target": torch.tensor(label.astype(np.float32)),
            "weight": torch.tensor(weight),
            "fold": torch.tensor(fold),
        }
        return feature_dict

    def __len__(self):
        if cfg.dev:
            return 256
        return len(self.fns)

    def load_one(self, id_, offset, duration):
        fp = self.data_folder + id_
        try:
            wav, sr = librosa.load(fp, sr=None, offset=offset, duration=duration)
        except:
            print("FAIL READING rec", fp)

        return wav

    def birds2target(self, birds):
        #birds = birds.split()
        target = [self.bird2id.get(item) for item in birds if not item == "nocall"]
        return target

    def secondary2target(self, secondary_label):
        birds = ast.literal_eval(secondary_label)
        target = [self.bird2id.get(item) for item in birds if not item == "nocall"]
        return target


In [8]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1.0 / p)


class GeM(nn.Module):
    # Generalized mean: https://arxiv.org/abs/1711.02512
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        ret = gem(x, p=self.p, eps=self.eps)
        return ret

    def __repr__(self):
        return (self.__class__.__name__+ "(p="+ "{:.4f}".format(self.p.data.tolist()[0])+ ", eps="+ str(self.eps)+ ")")


class Mixup(nn.Module):
    def __init__(self, mix_beta):

        super(Mixup, self).__init__()
        self.beta_distribution = Beta(mix_beta, mix_beta)

    def forward(self, X, Y, weight=None):

        bs = X.shape[0]
        n_dims = len(X.shape)
        perm = torch.randperm(bs)
        coeffs = self.beta_distribution.rsample(torch.Size((bs,))).to(X.device)

        if n_dims == 2:
            X = coeffs.view(-1, 1) * X + (1 - coeffs.view(-1, 1)) * X[perm]
        elif n_dims == 3:
            X = coeffs.view(-1, 1, 1) * X + (1 - coeffs.view(-1, 1, 1)) * X[perm]
        else:
            X = coeffs.view(-1, 1, 1, 1) * X + (1 - coeffs.view(-1, 1, 1, 1)) * X[perm]

        Y = coeffs.view(-1, 1) * Y + (1 - coeffs.view(-1, 1)) * Y[perm]

        if weight is None:
            return X, Y
        else:
            weight = coeffs.view(-1) * weight + (1 - coeffs.view(-1)) * weight[perm]
            return X, Y, weight

        
        
class Net(nn.Module):
    def __init__(self, cfg):
        super(Net, self).__init__()

        self.cfg = cfg

        self.n_classes = cfg.n_classes

        self.mel_spec = ta.transforms.MelSpectrogram(
            sample_rate=cfg.sample_rate,
            n_fft=cfg.window_size,
            win_length=cfg.window_size,
            hop_length=cfg.hop_size,
            f_min=cfg.fmin,
            f_max=cfg.fmax,
            pad=0,
            n_mels=cfg.mel_bins,
            power=cfg.power,
            normalized=False,
        )

        self.amplitude_to_db = ta.transforms.AmplitudeToDB(top_db=cfg.top_db)
        self.wav2img = torch.nn.Sequential(self.mel_spec, self.amplitude_to_db)

        self.backbone = timm.create_model(
            cfg.backbone,
            pretrained=cfg.pretrained,
            num_classes=0,
            global_pool="",
            in_chans=cfg.in_chans,
        )

        if "efficientnet" in cfg.backbone:
            backbone_out = self.backbone.num_features
        else:
            backbone_out = self.backbone.feature_info[-1]["num_chs"]

        self.global_pool = GeM()

        self.head = nn.Linear(backbone_out, self.n_classes)

        if cfg.pretrained_weights is not None:
            sd = torch.load(cfg.pretrained_weights, map_location="cpu")["model"]
            sd = {k.replace("module.", ""): v for k, v in sd.items()}
            self.load_state_dict(sd, strict=True)
            print("weights loaded from", cfg.pretrained_weights)
        self.loss_fn = nn.BCEWithLogitsLoss(reduction="none")

        self.mixup = Mixup(mix_beta=cfg.mix_beta)

        self.factor = int(cfg.wav_crop_len / 5.0)

    def forward(self, batch):

        if not self.training:
            x = batch["input"]
            bs, parts, time = x.shape
            x = x.reshape(parts, time)
            y = batch["target"]
            y = y[0]
        else:
            x = batch["input"]
            y = batch["target"]
            bs, time = x.shape
            x = x.reshape(bs * self.factor, time // self.factor)

        with autocast(enabled=False):
            x = self.wav2img(x)  # (bs, mel, time)
            if self.cfg.mel_norm:
                x = (x + 80) / 80

        x = x.permute(0, 2, 1)
        x = x[:, None, :, :]

        weight = batch["weight"]

        if self.training:
            b, c, t, f = x.shape
            x = x.permute(0, 2, 1, 3)
            x = x.reshape(b // self.factor, self.factor * t, c, f)

            if self.cfg.mixup:
                x, y, weight = self.mixup(x, y, weight)
            if self.cfg.mixup2:
                x, y, weight = self.mixup(x, y, weight)

            x = x.reshape(b, t, c, f)
            x = x.permute(0, 2, 1, 3)

        x = self.backbone(x)

        if self.training:
            b, c, t, f = x.shape
            x = x.permute(0, 2, 1, 3)
            x = x.reshape(b // self.factor, self.factor * t, c, f)
            x = x.permute(0, 2, 1, 3)
        x = self.global_pool(x)
        x = x[:, :, 0, 0]
        logits = self.head(x)

        loss = self.loss_fn(logits, y)
        loss = (loss.mean(dim=1) * weight) / weight.sum()
        loss = loss.sum()

        return {"loss": loss, "logits": logits.sigmoid(), "logits_raw": logits, "target": y}


In [9]:
def get_state_dict(sd_fp):
    sd = torch.load(sd_fp, map_location="cpu")['model']
    sd = {k.replace("module.", ""):v for k,v in sd.items()}
    return sd

from scipy.stats.mstats import gmean

In [10]:
wv, sr = librosa.load("../input/birdclef-2022/test_soundscapes/soundscape_453028782.ogg")

In [11]:
import math
list(range(1, math.ceil(((len(wv)) / sr) / 5)+1))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [12]:
import json

TEST_AUDIO_PATH = '../input/birdclef-2022/test_soundscapes/'

with open('../input/birdclef-2022/scored_birds.json') as fp:
    SCORED_BIRDS = json.load(fp)

In [13]:
def create_df_test_from_path():
    files = sorted(os.listdir(TEST_AUDIO_PATH))
    data = []
    for f in files:
        wv, sr = librosa.load(TEST_AUDIO_PATH + f)
        n_chunks = math.ceil(len(wv) / sr / 5)
        filename = f
        row_prefix = f[:-4]
        bird = SCORED_BIRDS[0]
        for chunk in range(1, n_chunks + 1):
            #for bird in SCORED_BIRDS:
            #row_id = f"{f[:-4]}_{bird}_{chunk*5}"
            
            ending_second = chunk*5
            data.append((filename, row_prefix, ending_second, [bird]))
            
    return  pd.DataFrame(data, columns=['filename', 'row_prefix', 'ending_second', 'birds'])
        
test_df = create_df_test_from_path()

In [14]:
print(test_df.shape)
test_df.head()

(12, 4)


Unnamed: 0,filename,row_prefix,ending_second,birds
0,soundscape_453028782.ogg,soundscape_453028782,5,[akiapo]
1,soundscape_453028782.ogg,soundscape_453028782,10,[akiapo]
2,soundscape_453028782.ogg,soundscape_453028782,15,[akiapo]
3,soundscape_453028782.ogg,soundscape_453028782,20,[akiapo]
4,soundscape_453028782.ogg,soundscape_453028782,25,[akiapo]


In [15]:
test_df.tail()

Unnamed: 0,filename,row_prefix,ending_second,birds
7,soundscape_453028782.ogg,soundscape_453028782,40,[akiapo]
8,soundscape_453028782.ogg,soundscape_453028782,45,[akiapo]
9,soundscape_453028782.ogg,soundscape_453028782,50,[akiapo]
10,soundscape_453028782.ogg,soundscape_453028782,55,[akiapo]
11,soundscape_453028782.ogg,soundscape_453028782,60,[akiapo]


In [16]:
N_CORES = 4
cfg.batch_size = 1

aug = None
test_ds = CustomDataset(test_df, cfg, aug, mode="val")
test_dl = DataLoader(test_ds, shuffle=False, batch_size = cfg.batch_size, num_workers = N_CORES)

test_ds[0]

  self[col] = igetitem(value, i)
  cpuset_checked))


{'input': tensor([[ 0.1307,  0.1192,  0.1069,  ...,  0.1011,  0.1114,  0.1204],
         [ 0.1141,  0.1103,  0.1133,  ..., -0.0515, -0.0519, -0.0415],
         [-0.0337, -0.0339, -0.0403,  ...,  0.1231,  0.1273,  0.1231],
         ...,
         [ 0.1044,  0.1021,  0.1125,  ...,  0.1141,  0.1089,  0.1174],
         [ 0.1186,  0.1055,  0.1018,  ...,  0.1339,  0.1321,  0.1370],
         [ 0.1367,  0.1420,  0.1431,  ...,  0.0104,  0.0234,  0.0339]]),
 'target': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 'weight': tensor(1),
 'fold': tensor(-1)}

In [17]:
DEVICE = "cuda" if torch.cuda.is_available() else 'cpu'

In [18]:
#state_dict = "../input/mel-gem-resnet-from-2021-2nd-place/first_model/checkpoint_last_seed123.pth"
#state_dict = "../input/mel-gem-resnet/first_model/checkpoint_last_seed123.pth"
state_dict = "../input/mel-gem-resnet-from-2021-2nd-place/first_model/checkpoint_last_seed123.pth"
cfg.backbone = "resnet34"
net = Net(cfg).eval().to(DEVICE)
sd = get_state_dict(state_dict)
print("loading dict")
net.load_state_dict(sd, strict=True)


loading dict


<All keys matched successfully>

In [19]:
list(net.global_pool.parameters())

[Parameter containing:
 tensor([3.0209], device='cuda:0', requires_grad=True)]

In [20]:
def flatten(l):
    return [item for sublist in l for item in sublist]

In [21]:
with torch.no_grad():
    preds = []
    for batch in tqdm(test_dl):
        batch = batch_to_device(batch, DEVICE)
        with torch.cuda.amp.autocast():
            out = net(batch)['logits']
            preds += [out.cpu().numpy()]

100%|██████████| 1/1 [00:00<00:00,  3.05it/s]


In [22]:
#test_df = create_df_test_from_path()

In [23]:
df_preds = pd.DataFrame(np.vstack(preds), columns=test_ds.bird2id.keys())[SCORED_BIRDS]
df_preds.head()

Unnamed: 0,akiapo,aniani,apapan,barpet,crehon,elepai,ercfra,hawama,hawcre,hawgoo,...,hawpet1,houfin,iiwi,jabwar,maupar,omao,puaioh,skylar,warwhe1,yefcan
0,6.9e-05,0.002581,0.001367,0.00032,0.00145,0.000343,0.000813,0.000798,3.9e-05,0.000333,...,0.000217,0.002405,0.00375,0.004505,2.4e-05,0.000631,0.000301,0.010651,0.027634,0.00088
1,0.000199,0.002157,0.001796,0.000464,0.000437,0.000315,0.000767,0.000657,0.000257,0.000469,...,0.000524,0.010612,0.005798,0.00639,4e-05,0.000744,0.000785,0.005535,0.024094,0.00104
2,0.000553,0.00452,0.003325,0.000187,0.000629,0.000707,0.001211,0.001143,0.000675,0.001346,...,0.000894,0.018402,0.009857,0.005844,0.000166,0.001274,0.000555,0.004627,0.01001,0.002522
3,0.000134,0.00321,0.001249,0.00014,0.000238,0.000866,0.00082,0.000866,0.000123,0.000479,...,0.000536,0.005322,0.003944,0.004738,8.4e-05,0.000605,0.000243,0.002924,0.014175,0.000482
4,0.000193,0.001467,0.000696,0.000178,0.000704,0.00033,0.00082,0.000741,0.000339,0.000259,...,0.000511,0.005753,0.001883,0.001748,2.9e-05,0.00104,0.000469,0.016159,0.009712,0.003172


In [24]:
test_df = test_df.join(df_preds).drop(['birds'], axis=1).reset_index()
test_df = pd.melt(test_df, id_vars=['filename', 'row_prefix', 'ending_second'], value_vars=SCORED_BIRDS, var_name="bird", value_name="proba")
test_df.head()

Unnamed: 0,filename,row_prefix,ending_second,bird,proba
0,soundscape_453028782.ogg,soundscape_453028782,5,akiapo,6.9e-05
1,soundscape_453028782.ogg,soundscape_453028782,10,akiapo,0.000199
2,soundscape_453028782.ogg,soundscape_453028782,15,akiapo,0.000553
3,soundscape_453028782.ogg,soundscape_453028782,20,akiapo,0.000134
4,soundscape_453028782.ogg,soundscape_453028782,25,akiapo,0.000193


In [25]:
test_df['row_id'] = test_df['row_prefix'] + "_" + test_df['bird'] + "_" + test_df['ending_second'].astype(str)
test_df.head()

Unnamed: 0,filename,row_prefix,ending_second,bird,proba,row_id
0,soundscape_453028782.ogg,soundscape_453028782,5,akiapo,6.9e-05,soundscape_453028782_akiapo_5
1,soundscape_453028782.ogg,soundscape_453028782,10,akiapo,0.000199,soundscape_453028782_akiapo_10
2,soundscape_453028782.ogg,soundscape_453028782,15,akiapo,0.000553,soundscape_453028782_akiapo_15
3,soundscape_453028782.ogg,soundscape_453028782,20,akiapo,0.000134,soundscape_453028782_akiapo_20
4,soundscape_453028782.ogg,soundscape_453028782,25,akiapo,0.000193,soundscape_453028782_akiapo_25


In [26]:
test_df['target'] = test_df['proba'] > 0.012
test_df.head()

Unnamed: 0,filename,row_prefix,ending_second,bird,proba,row_id,target
0,soundscape_453028782.ogg,soundscape_453028782,5,akiapo,6.9e-05,soundscape_453028782_akiapo_5,False
1,soundscape_453028782.ogg,soundscape_453028782,10,akiapo,0.000199,soundscape_453028782_akiapo_10,False
2,soundscape_453028782.ogg,soundscape_453028782,15,akiapo,0.000553,soundscape_453028782_akiapo_15,False
3,soundscape_453028782.ogg,soundscape_453028782,20,akiapo,0.000134,soundscape_453028782_akiapo_20,False
4,soundscape_453028782.ogg,soundscape_453028782,25,akiapo,0.000193,soundscape_453028782_akiapo_25,False


In [27]:
sub = test_df[['row_id', 'target']]
sub.to_csv("submission.csv", index=False)
sub.head()

Unnamed: 0,row_id,target
0,soundscape_453028782_akiapo_5,False
1,soundscape_453028782_akiapo_10,False
2,soundscape_453028782_akiapo_15,False
3,soundscape_453028782_akiapo_20,False
4,soundscape_453028782_akiapo_25,False
