In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch
import torchaudio as ta
import torchaudio.functional as taf
import torchaudio.transforms as tat
from torchvision import transforms

print(torch.__version__)        
print(ta.__version__)

import matplotlib
import matplotlib.pyplot as plt
from IPython.display import Audio, display

import pandas as pd
import os
import pprint

from typing import *
import itertools
from collections import Counter

import numpy as np
from datetime import datetime
from lark.config import Config
from lark.learner import Learner
from lark.ops import Sig2Spec, MixedSig2Spec
from lark.data import *

1.8.1
0.8.0a0+e4e171a


In [3]:
torch.cuda.set_device(0)
torch.cuda.current_device()

0

In [4]:
# get list of models
torch.hub.list('zhanghang1989/ResNeSt', force_reload=True)

Downloading: "https://github.com/zhanghang1989/ResNeSt/archive/master.zip" to /home/koen/.cache/torch/hub/master.zip


['resnest101',
 'resnest200',
 'resnest269',
 'resnest50',
 'resnest50_fast_1s1x64d',
 'resnest50_fast_1s2x40d',
 'resnest50_fast_1s4x24d',
 'resnest50_fast_2s1x64d',
 'resnest50_fast_2s2x40d',
 'resnest50_fast_4s1x64d',
 'resnest50_fast_4s2x40d']

In [5]:
cfg = Config(
    n_workers=18,
    
    n_fft=3200,
    window_length=3200,
    n_mels=128,
    hop_length=800,
    
    use_pink_noise=0,
    use_recorded_noise=0,
    use_overlays=False,
    apply_filter=0,
    sites=['COR'], 
    use_neptune=False, 
    log_batch_metrics=False,
    n_epochs=1000, 
    bs=32,
    lr=1e-3, 
    model='resnest50',
    scheduler='torch.optim.lr_scheduler.CosineAnnealingLR'
)

In [6]:
cfg.as_dict()

{'sites': ['COR'],
 'data_dir': 'data/birdclef-2021',
 'checkpoint_dir': 'checkpoints',
 'bs': 32,
 'n_workers': 18,
 'train_duration': 5,
 'valid_duration': 5,
 'seed': 231,
 'use_pink_noise': 0,
 'use_recorded_noise': 0,
 'use_secondary_labels': False,
 'noise_nsr_dbs': [20, 10, 3],
 'pink_noise_nsr_dbs': [10, 5, 3],
 'noise_dir': 'data/noise/BirdVox-DCASE-20k/wav-32k',
 'use_overlays': False,
 'max_overlays': 5,
 'overlay_weights': [0.71986223,
  0.21010333,
  0.06314581,
  0.00574053,
  0.00114811],
 'overlay_snr_dbs': [20, 10, 3],
 'apply_filter': 0,
 'use_neptune': False,
 'log_batch_metrics': False,
 'sr': 32000,
 'n_fft': 3200,
 'window_length': 3200,
 'n_mels': 128,
 'hop_length': 800,
 'f_min': 150,
 'f_max': 15000,
 'f1_threshold': 0.5,
 'lr': 0.001,
 'n_epochs': 1000,
 'model': 'resnest50',
 'optimizer': 'torch.optim.Adam',
 'loss_fn': 'torch.nn.BCEWithLogitsLoss',
 'scheduler': 'torch.optim.lr_scheduler.CosineAnnealingLR',
 'labels': ['bobfly1',
  'brnjay',
  'bucmot2',
  

In [7]:
cfg.training_dataset_size

2000

In [10]:
prep = Sig2Spec(cfg)
main_model = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)


for param in main_model.parameters():
    param.requires_grad = False

for layer in [main_model.layer3, main_model.layer4, main_model.avgpool]:
    for param in layer.parameters():
        param.requires_grad = True

posp = torch.nn.Sequential(
    torch.nn.Linear(in_features=2048, out_features=1024, bias=True),
    torch.nn.Dropout(p=0.2),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=1024, out_features=512, bias=True),
    torch.nn.Dropout(p=0.2),
    torch.nn.ReLU(),
    torch.nn.Linear(in_features=512, out_features=len(cfg.labels), bias=True),
)
main_model.fc = posp
model = torch.nn.Sequential(prep, main_model)
model = model.cuda()


Using cache found in /home/koen/.cache/torch/hub/zhanghang1989_ResNeSt_master


In [11]:
model

Sequential(
  (0): Sig2Spec(
    (melspec): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
    (p2db): AmplitudeToDB()
    (tf_norm): Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  )
  (1): ResNet(
    (conv1): Sequential(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mod

In [33]:
lrn = Learner("resnest50-vanilla", cfg, model)

resnest50-vanilla-20210514-085931


In [34]:
# x, y = next(iter(lrn.tdl))
# model(x.cuda())

In [35]:
lrn.learn()

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:00:04 epoch:   1 train loss: 0.212912 train f1: 0.040167 valid loss: 0.093367 valid f1: 0.009569


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:00:39 epoch:   2 train loss: 0.153560 train f1: 0.224635 valid loss: 0.110767 valid f1: 0.011719


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:01:13 epoch:   3 train loss: 0.120152 train f1: 0.456885 valid loss: 0.113290 valid f1: 0.039604


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:01:47 epoch:   4 train loss: 0.092660 train f1: 0.616063 valid loss: 0.116293 valid f1: 0.164103


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:02:21 epoch:   5 train loss: 0.070839 train f1: 0.721548 valid loss: 0.116533 valid f1: 0.130584


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:02:54 epoch:   6 train loss: 0.063352 train f1: 0.770962 valid loss: 0.096964 valid f1: 0.223496


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:03:28 epoch:   7 train loss: 0.050210 train f1: 0.831651 valid loss: 0.099510 valid f1: 0.257718


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:04:02 epoch:   8 train loss: 0.040227 train f1: 0.865359 valid loss: 0.088334 valid f1: 0.421622


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:04:37 epoch:   9 train loss: 0.035739 train f1: 0.878049 valid loss: 0.084286 valid f1: 0.360610


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:05:11 epoch:  10 train loss: 0.029482 train f1: 0.901847 valid loss: 0.084936 valid f1: 0.352318


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:05:45 epoch:  11 train loss: 0.031449 train f1: 0.898888 valid loss: 0.085585 valid f1: 0.380336


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:06:19 epoch:  12 train loss: 0.028828 train f1: 0.907363 valid loss: 0.084517 valid f1: 0.374843


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:06:53 epoch:  13 train loss: 0.032612 train f1: 0.896461 valid loss: 0.087865 valid f1: 0.335553


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:07:27 epoch:  14 train loss: 0.031524 train f1: 0.893999 valid loss: 0.091704 valid f1: 0.379828


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:08:01 epoch:  15 train loss: 0.034175 train f1: 0.883893 valid loss: 0.084064 valid f1: 0.330645


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:08:35 epoch:  16 train loss: 0.048392 train f1: 0.839764 valid loss: 0.093506 valid f1: 0.248416


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:09:09 epoch:  17 train loss: 0.058498 train f1: 0.790800 valid loss: 0.115178 valid f1: 0.096089


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:09:43 epoch:  18 train loss: 0.050440 train f1: 0.823784 valid loss: 0.111885 valid f1: 0.090791


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:10:17 epoch:  19 train loss: 0.056293 train f1: 0.812517 valid loss: 0.156923 valid f1: 0.084481


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:10:51 epoch:  20 train loss: 0.063391 train f1: 0.778331 valid loss: 0.102797 valid f1: 0.255814


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:11:24 epoch:  21 train loss: 0.056267 train f1: 0.806627 valid loss: 0.099736 valid f1: 0.132414


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:11:58 epoch:  22 train loss: 0.049566 train f1: 0.835272 valid loss: 0.109509 valid f1: 0.124118


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:12:32 epoch:  23 train loss: 0.043714 train f1: 0.853113 valid loss: 0.103532 valid f1: 0.379157


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:13:06 epoch:  24 train loss: 0.033654 train f1: 0.887833 valid loss: 0.094883 valid f1: 0.398929


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 09:13:40 epoch:  25 train loss: 0.031666 train f1: 0.895218 valid loss: 0.098697 valid f1: 0.307102


  0%|          | 0/63 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
lrn.evaluate()

In [26]:
lrn.load_checkpoint('best')

{'epoch': 10,
 'valid_loss': 0.07955082545154973,
 'valid_score': 0.4748743772506714}

In [27]:
lrn.evaluate()

  0%|          | 0/38 [00:00<?, ?it/s]

Unnamed: 0,thresh,tp,tn,fp,fn,f1
0,0.0,413,0,23587,0,0.033834
1,0.1,293,21587,2000,120,0.216556
2,0.2,264,22749,838,149,0.348515
3,0.3,233,23147,440,180,0.429098
4,0.4,213,23313,274,200,0.473333
5,0.5,189,23393,194,224,0.474874
6,0.6,172,23442,145,241,0.471233
7,0.7,153,23484,103,260,0.457399
8,0.8,106,23511,76,307,0.356303
9,0.9,62,23554,33,351,0.244094


In [28]:
lrn.learn()

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:52:08 epoch:   1 train loss: 0.031479 train f1: 0.896259 valid loss: 0.077326 valid f1: 0.483029


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:52:43 epoch:   2 train loss: 0.028263 train f1: 0.903739 valid loss: 0.081404 valid f1: 0.468599


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:53:17 epoch:   3 train loss: 0.025389 train f1: 0.918196 valid loss: 0.089747 valid f1: 0.415948


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:53:51 epoch:   4 train loss: 0.030902 train f1: 0.896970 valid loss: 0.109101 valid f1: 0.171745


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:54:25 epoch:   5 train loss: 0.036986 train f1: 0.872708 valid loss: 0.137183 valid f1: 0.214090


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:54:59 epoch:   6 train loss: 0.043497 train f1: 0.856836 valid loss: 0.112311 valid f1: 0.221870


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:55:34 epoch:   7 train loss: 0.056680 train f1: 0.803723 valid loss: 0.090113 valid f1: 0.410714


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:56:08 epoch:   8 train loss: 0.059713 train f1: 0.793339 valid loss: 0.131200 valid f1: 0.166344


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:56:42 epoch:   9 train loss: 0.056574 train f1: 0.801103 valid loss: 0.107602 valid f1: 0.258940


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:57:16 epoch:  10 train loss: 0.050020 train f1: 0.834722 valid loss: 0.119466 valid f1: 0.231146


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:57:50 epoch:  11 train loss: 0.049440 train f1: 0.838554 valid loss: 0.095878 valid f1: 0.323680


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:58:24 epoch:  12 train loss: 0.048194 train f1: 0.845389 valid loss: 0.084813 valid f1: 0.341304


  0%|          | 0/63 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

2021-05-14 08:58:59 epoch:  13 train loss: 0.040860 train f1: 0.856687 valid loss: 0.092160 valid f1: 0.304516


  0%|          | 0/63 [00:00<?, ?it/s]

KeyboardInterrupt: 