<a href="https://colab.research.google.com/github/benayas1/ALV_Framework/blob/master/cornell_birds_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training BirdSongs

In [4]:
# Import apex for mixed precision training
#!cd /kaggle/input/apexpytorch/ && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ >> /dev/null

#!wget https://github.com/benayas1/benatools/archive/master.zip -P bena
#!unzip bena/master.zip

!pip install benatools >> /dev/null

# Install resnest
#!pip install ../input/resnest50-fast-package/resnest-0.0.6b20200701/resnest/ >> /dev/null
!pip install soundfile

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import typing as tp
import cv2
import librosa
import random
import audioread
import soundfile as sf
import os
import time as time
import matplotlib.pyplot as plt

import benatools as bena
from benatools.tools import MultiStratifiedKFold
from benatools.torch.fitter import TorchFitter
from benatools.torch.efficient_net import create_efn


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

!pip install gcsfs
import gcsfs
from google.colab import auth
from google.cloud import storage


#from apex import amp

#import resnest.torch as resnest_torch

from sklearn.metrics import f1_score
from tqdm import tqdm
import glob

COLAB = True

if COLAB:
    os.environ.setdefault("GCLOUD_PROJECT", "omega-cosmos-116215")
    GS_BUCKET = 'benayas_kaggle'
    !echo "deb http://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
    !curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
    !sudo apt-get -y -q update
    !sudo apt-get -y -q install gcsfuse
    auth.authenticate_user()
    !mkdir -p data
    !gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 {GS_BUCKET} data


deb http://packages.cloud.google.com/apt gcsfuse-bionic main
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   653  100   653    0     0  19787      0 --:--:-- --:--:-- --:--:-- 19787
OK
Hit:1 http://packages.cloud.google.com/apt gcsfuse-bionic InRelease
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/ InRelease
Ign:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Hit:4 http://security.ubuntu.com/ubuntu bionic-security InRelease
Ign:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  Release
Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Hit:8 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu bionic InRelease
Hit:10 http://archive.ubu

In [5]:
# Seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

# Read Data
Read data from resampled datasets

In [None]:
%time

def get_df_kaggle(TRAIN_RESAMPLED_AUDIO_DIRS, train_df):
    print('Data will be loaded from Kaggle Platform')
    tmp_list = []
    for audio_d in TRAIN_RESAMPLED_AUDIO_DIRS:
        if not audio_d.exists():
            continue
        for ebird_d in audio_d.iterdir():
            if ebird_d.is_file():
                continue
            for wav_f in ebird_d.iterdir():
                tmp_list.append([ebird_d.name, wav_f.name, wav_f.as_posix()])
                
    train_wav_path_exist = pd.DataFrame(tmp_list, columns=["ebird_code", "resampled_filename", "file_path"])
    del tmp_list
    train_all = pd.merge(train, train_wav_path_exist, on=["ebird_code", "resampled_filename"], how="inner")

    print(train.shape)
    print(train_wav_path_exist.shape)
    print(train_all.shape)
    return train_all

def get_df_colab(TRAIN_RESAMPLED_AUDIO_DIRS, train_df, client):
    print('Data will be loaded from GCS')
    tmp_list = []
    for audio_d in TRAIN_RESAMPLED_AUDIO_DIRS:
        for f in glob.iglob(audio_d+'/*'):
            if f[-4:] == '.csv':
                continue
            for wav_f in glob.iglob(f+'/*.wav'):
                tmp_list.append( [f[f.rfind('/')+1:], wav_f[wav_f.rfind('/')+1:], wav_f] )

    train_wav_path_exist = pd.DataFrame(tmp_list, columns=["ebird_code", "resampled_filename", "file_path"])
    del tmp_list
    train_all = pd.merge(train, train_wav_path_exist, on=["ebird_code", "resampled_filename"], how="inner")

    print(train.shape)
    print(train_wav_path_exist.shape)
    print(train_all.shape)
    return train_all

if COLAB:
    INPUT_ROOT  = "/content/data/birdsong-recognition/"
    TRAIN_RESAMPLED_AUDIO_DIRS = [
      INPUT_ROOT + "birdsong-resampled-train-audio-{:0>2}".format(i)  for i in range(5)
    ]
    train = pd.read_csv(TRAIN_RESAMPLED_AUDIO_DIRS[0] + "/train_mod.csv")
    storage_client = storage.Client()
    train_all = get_df_colab(TRAIN_RESAMPLED_AUDIO_DIRS, train, storage_client)
else:
    ROOT = Path.cwd().parent
    INPUT_ROOT = ROOT / "input"
    RAW_DATA = INPUT_ROOT / "birdsong-recognition"
    TRAIN_AUDIO_DIR = RAW_DATA / "train_audio"
    TRAIN_RESAMPLED_AUDIO_DIRS = [
      INPUT_ROOT / "birdsong-resampled-train-audio-{:0>2}".format(i)  for i in range(5)
    ]
    TEST_AUDIO_DIR = RAW_DATA / "test_audio"

    train = pd.read_csv(TRAIN_RESAMPLED_AUDIO_DIRS[0] / "train_mod.csv")
    train_all = get_df_kaggle(TRAIN_RESAMPLED_AUDIO_DIRS, train)

train_all

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 7.15 µs
Data will be loaded from GCS


In [None]:
# Coding the bird names

BIRD_CODE = {
    'aldfly': 0, 'ameavo': 1, 'amebit': 2, 'amecro': 3, 'amegfi': 4,
    'amekes': 5, 'amepip': 6, 'amered': 7, 'amerob': 8, 'amewig': 9,
    'amewoo': 10, 'amtspa': 11, 'annhum': 12, 'astfly': 13, 'baisan': 14,
    'baleag': 15, 'balori': 16, 'banswa': 17, 'barswa': 18, 'bawwar': 19,
    'belkin1': 20, 'belspa2': 21, 'bewwre': 22, 'bkbcuc': 23, 'bkbmag1': 24,
    'bkbwar': 25, 'bkcchi': 26, 'bkchum': 27, 'bkhgro': 28, 'bkpwar': 29,
    'bktspa': 30, 'blkpho': 31, 'blugrb1': 32, 'blujay': 33, 'bnhcow': 34,
    'boboli': 35, 'bongul': 36, 'brdowl': 37, 'brebla': 38, 'brespa': 39,
    'brncre': 40, 'brnthr': 41, 'brthum': 42, 'brwhaw': 43, 'btbwar': 44,
    'btnwar': 45, 'btywar': 46, 'buffle': 47, 'buggna': 48, 'buhvir': 49,
    'bulori': 50, 'bushti': 51, 'buwtea': 52, 'buwwar': 53, 'cacwre': 54,
    'calgul': 55, 'calqua': 56, 'camwar': 57, 'cangoo': 58, 'canwar': 59,
    'canwre': 60, 'carwre': 61, 'casfin': 62, 'caster1': 63, 'casvir': 64,
    'cedwax': 65, 'chispa': 66, 'chiswi': 67, 'chswar': 68, 'chukar': 69,
    'clanut': 70, 'cliswa': 71, 'comgol': 72, 'comgra': 73, 'comloo': 74,
    'commer': 75, 'comnig': 76, 'comrav': 77, 'comred': 78, 'comter': 79,
    'comyel': 80, 'coohaw': 81, 'coshum': 82, 'cowscj1': 83, 'daejun': 84,
    'doccor': 85, 'dowwoo': 86, 'dusfly': 87, 'eargre': 88, 'easblu': 89,
    'easkin': 90, 'easmea': 91, 'easpho': 92, 'eastow': 93, 'eawpew': 94,
    'eucdov': 95, 'eursta': 96, 'evegro': 97, 'fiespa': 98, 'fiscro': 99,
    'foxspa': 100, 'gadwal': 101, 'gcrfin': 102, 'gnttow': 103, 'gnwtea': 104,
    'gockin': 105, 'gocspa': 106, 'goleag': 107, 'grbher3': 108, 'grcfly': 109,
    'greegr': 110, 'greroa': 111, 'greyel': 112, 'grhowl': 113, 'grnher': 114,
    'grtgra': 115, 'grycat': 116, 'gryfly': 117, 'haiwoo': 118, 'hamfly': 119,
    'hergul': 120, 'herthr': 121, 'hoomer': 122, 'hoowar': 123, 'horgre': 124,
    'horlar': 125, 'houfin': 126, 'houspa': 127, 'houwre': 128, 'indbun': 129,
    'juntit1': 130, 'killde': 131, 'labwoo': 132, 'larspa': 133, 'lazbun': 134,
    'leabit': 135, 'leafly': 136, 'leasan': 137, 'lecthr': 138, 'lesgol': 139,
    'lesnig': 140, 'lesyel': 141, 'lewwoo': 142, 'linspa': 143, 'lobcur': 144,
    'lobdow': 145, 'logshr': 146, 'lotduc': 147, 'louwat': 148, 'macwar': 149,
    'magwar': 150, 'mallar3': 151, 'marwre': 152, 'merlin': 153, 'moublu': 154,
    'mouchi': 155, 'moudov': 156, 'norcar': 157, 'norfli': 158, 'norhar2': 159,
    'normoc': 160, 'norpar': 161, 'norpin': 162, 'norsho': 163, 'norwat': 164,
    'nrwswa': 165, 'nutwoo': 166, 'olsfly': 167, 'orcwar': 168, 'osprey': 169,
    'ovenbi1': 170, 'palwar': 171, 'pasfly': 172, 'pecsan': 173, 'perfal': 174,
    'phaino': 175, 'pibgre': 176, 'pilwoo': 177, 'pingro': 178, 'pinjay': 179,
    'pinsis': 180, 'pinwar': 181, 'plsvir': 182, 'prawar': 183, 'purfin': 184,
    'pygnut': 185, 'rebmer': 186, 'rebnut': 187, 'rebsap': 188, 'rebwoo': 189,
    'redcro': 190, 'redhea': 191, 'reevir1': 192, 'renpha': 193, 'reshaw': 194,
    'rethaw': 195, 'rewbla': 196, 'ribgul': 197, 'rinduc': 198, 'robgro': 199,
    'rocpig': 200, 'rocwre': 201, 'rthhum': 202, 'ruckin': 203, 'rudduc': 204,
    'rufgro': 205, 'rufhum': 206, 'rusbla': 207, 'sagspa1': 208, 'sagthr': 209,
    'savspa': 210, 'saypho': 211, 'scatan': 212, 'scoori': 213, 'semplo': 214,
    'semsan': 215, 'sheowl': 216, 'shshaw': 217, 'snobun': 218, 'snogoo': 219,
    'solsan': 220, 'sonspa': 221, 'sora': 222, 'sposan': 223, 'spotow': 224,
    'stejay': 225, 'swahaw': 226, 'swaspa': 227, 'swathr': 228, 'treswa': 229,
    'truswa': 230, 'tuftit': 231, 'tunswa': 232, 'veery': 233, 'vesspa': 234,
    'vigswa': 235, 'warvir': 236, 'wesblu': 237, 'wesgre': 238, 'weskin': 239,
    'wesmea': 240, 'wessan': 241, 'westan': 242, 'wewpew': 243, 'whbnut': 244,
    'whcspa': 245, 'whfibi': 246, 'whtspa': 247, 'whtswi': 248, 'wilfly': 249,
    'wilsni1': 250, 'wiltur': 251, 'winwre3': 252, 'wlswar': 253, 'wooduc': 254,
    'wooscj2': 255, 'woothr': 256, 'y00475': 257, 'yebfly': 258, 'yebsap': 259,
    'yehbla': 260, 'yelwar': 261, 'yerwar': 262, 'yetvir': 263
}

INV_BIRD_CODE = {v: k for k, v in BIRD_CODE.items()}

# Dataset and DataLoader

In [None]:
PERIOD = 5

def mono_to_color(
    X: np.ndarray, mean=None, std=None,
    norm_max=None, norm_min=None, eps=1e-6
):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

class SpectrogramDataset(data.Dataset):
    def __init__(
        self,
        file_list: tp.List[tp.List[str]], 
        img_size=224,
        waveform_transforms=None, 
        spectrogram_transforms=None, 
        melspectrogram_parameters={},
        one_hot_label=True
    ):
        self.file_list = file_list  # list of list: [file_path, ebird_code]
        self.img_size = img_size
        self.waveform_transforms = waveform_transforms
        self.spectrogram_transforms = spectrogram_transforms
        self.melspectrogram_parameters = melspectrogram_parameters
        self.one_hot_label = one_hot_label
        self.times = {'read':0.0, 'waveform':0.0, 'melspec':0.0, 'spectogram':0.0, 'image':0.0, 'label':0.0}

    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, idx: int):
        wav_path, ebird_code = self.file_list[idx]

        t = time.time()
        y, sr = sf.read(wav_path)
        self.times['read'] = self.times['read'] + time.time() - t; t = time.time()

        if self.waveform_transforms:
            y = self.waveform_transforms(y)
        else:
            len_y = len(y)
            effective_length = sr * PERIOD
            if len_y < effective_length:
                new_y = np.zeros(effective_length, dtype=y.dtype)
                start = np.random.randint(effective_length - len_y)
                new_y[start:start + len_y] = y
                y = new_y.astype(np.float32)
            elif len_y > effective_length:
                start = np.random.randint(len_y - effective_length)
                y = y[start:start + effective_length].astype(np.float32)
            else:
                y = y.astype(np.float32)
        self.times['waveform'] = self.times['waveform'] + time.time() - t; t = time.time()

        melspec = librosa.feature.melspectrogram(y, sr=sr, **self.melspectrogram_parameters)
        melspec = librosa.power_to_db(melspec).astype(np.float32)
        self.times['melspec'] = self.times['melspec'] + time.time() - t; t = time.time()

        if self.spectrogram_transforms:
            melspec = self.spectrogram_transforms(melspec)
        else:
            pass
        self.times['spectogram'] = self.times['spectogram'] + t - time.time(); t = time.time()
        
        image = mono_to_color(melspec)
        height, width, _ = image.shape
        image = cv2.resize(image, (int(width * self.img_size / height), self.img_size))
        image = np.moveaxis(image, 2, 0)
        image = (image / 255.0).astype(np.float32)
        self.times['image'] = self.times['image'] + time.time() - t; t = time.time()

        # Labels in One Hot format
        if self.one_hot_label:
            labels = np.zeros(len(BIRD_CODE), dtype="f")
            labels[BIRD_CODE[ebird_code]] = 1
        else:  # Labels in integer format
            labels = BIRD_CODE[ebird_code]
        self.times['label'] = self.times['label'] + time.time() - t; t = time.time()

        return image, labels
    

def get_loaders_for_training(train_file_list: tp.List[str], val_file_list: tp.List[str], img_size=224, melspectogram_params={}, bs_train=50, bs_val=100, loss=None):
    """Function to return dataloaders"""
    # CrossEntropyLoss requires integer format for the classes
    one_hot_format = loss != torch.nn.CrossEntropyLoss
    
    # # make dataset
    train_dataset = SpectrogramDataset(train_file_list, img_size=img_size, melspectrogram_parameters=melspectogram_params, one_hot_label=one_hot_format)
    val_dataset   = SpectrogramDataset(val_file_list, img_size=img_size, melspectrogram_parameters=melspectogram_params, one_hot_label=one_hot_format)
    # # make dataloader
    train_loader = data.DataLoader(train_dataset, shuffle=True,  num_workers=6, drop_last=True,  pin_memory=True, batch_size=bs_train)
    val_loader   = data.DataLoader(val_dataset,   shuffle=False, num_workers=6, drop_last=False, pin_memory=True, batch_size=bs_val)
    
    return train_loader, val_loader

# Experiment Parameters

In [None]:
N_EXPERIMENTS = 1  # Normally not more than one run per commit
FOLD = 0 # Each run should cover a single fold

# DATASET PARAMS
IMG_SIZE = [224] * N_EXPERIMENTS
MELSPECTOGRAM_PARAMS = [{'n_mels':128, 'fmin':20, 'fmax':16000}] * N_EXPERIMENTS

# DATALOADER PARAMS
BS_TRAIN = [64]
BS_VAL = [64]

# MODEL PARAMS
MODEL = [0]

# LOSS FUNCTION
LOSS = [torch.nn.CrossEntropyLoss]
        #torch.nn.BCEWithLogitsLoss

# LR SCHEDULER
LR = [torch.optim.lr_scheduler.ReduceLROnPlateau] * N_EXPERIMENTS
#LR = [torch.optim.lr_scheduler.CosineAnnealingLR] * N_EXPERIMENTS
LR_PARAMS = [dict( mode='min',
                   factor=0.5,
                   patience=1,
                   verbose=False, 
                   threshold=0.0001,
                   threshold_mode='abs',
                   cooldown=0, 
                   min_lr=1e-8,
                   eps=1e-08
                )] * N_EXPERIMENTS
#LR_PARAMS = [dict(T_max=10)] * N_EXPERIMENTS

# HALF PRECISION
HP = [False] * N_EXPERIMENTS

# GLOBAL PARAMETERS
EPOCHS=45
N_CLASSES = 264
DISPLAY_PLOT=True

# Models

In [None]:

# Function to be called
def get_model(model, n_classes):
    if model == 0:
        return get_model0( n_classes)
    
    if model == 1:
        return get_model1( n_classes)


# Model based on efficient net B0
def get_model0(n_classes):
    model = create_efn(0)
    #del model.fc
    # # use the same head as the baseline notebook.
    n_features = model.classifier.in_features
    
    model.classifier = nn.Sequential(
        nn.Linear(n_features, 512), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(512, 512), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(512, n_classes))
    
    return model

# Model based on resnest
def get_model1(n_classes):
    model = resnest_torch.resnest50_fast_1s1x64d(pretrained=True)
    del model.fc
    # # use the same head as the baseline notebook.
    model.fc = nn.Sequential(
        nn.Linear(2048, 1024), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
        nn.Linear(1024, n_classes))
    
    return model


# Training

In [None]:
# Split train/val, taking fold 0 for experiments
cv = MultiStratifiedKFold(5, train_all, ['ebird_code'], seed=42)
train_idx, val_idx = cv.get_indices(fold=FOLD)
train_files = train_all.iloc[train_idx][['file_path','ebird_code']].values.tolist()
val_files = train_all.iloc[val_idx][['file_path','ebird_code']].values.tolist()
print(f'Train set contains {len(train_files)} samples, Val set contains {len(val_files)}')

In [None]:
for i in range(N_EXPERIMENTS):
    print(f'********** EXPERIMENT {i} **********')
    print(f'***** img size {IMG_SIZE[i]} *****')
    print(f'***** bs train {BS_TRAIN[i]} *****')
    print(f'***** bs val {BS_VAL[i]} *****')
    print(f'***** model {MODEL[i]} *****')
    print(f'***** scheduler class {LR[i]} *****')
    print(f'***** loss class {LOSS[i]} *****')
    print(f'***** half precission {HP[i]} *****')
    print(f'**********************************\n')

    # Create Data Loaders
    train_loader, val_loader = get_loaders_for_training(train_file_list = train_files,
                                                        val_file_list = val_files,
                                                        img_size = IMG_SIZE[i],
                                                        melspectogram_params = MELSPECTOGRAM_PARAMS[i],
                                                        bs_train = BS_TRAIN[i],
                                                        bs_val = BS_VAL[i],
                                                        loss = LOSS[i]
                                                        )
    print(f'Training on  {len(train_loader)} batches, validating on {len(val_loader)} batches')
    
    # Load Model
    device = torch.device('cuda:0')
    model = get_model(MODEL[i], N_CLASSES)
    model.to(device)
    
    # Create fitter object
    if HP[i]:
        fitter = TorchFitterHP(model, device, loss=LOSS[i](), n_epochs=EPOCHS, lr=0.001, scheduler_class = LR[i], scheduler_params = LR_PARAMS[i], verbose=10, early_stopping=5)
    else:
        fitter = TorchFitter(model, device, loss=LOSS[i](), n_epochs=EPOCHS, lr=0.001, scheduler_class = LR[i], scheduler_params = LR_PARAMS[i], verbose=10, early_stopping=5)
    history = fitter.fit(train_loader, val_loader)
    
    # Calculate score on validation set
    model = fitter.model
    model.eval()
    
    labels = []
    outputs = []
    with torch.no_grad():
        for img, ls in tqdm(val_loader):
            outputs += np.argmax(model(img.to(device)).cpu().numpy(), axis=1).tolist()
            labels +=  np.argmax(ls.numpy(), axis=1).tolist() if len(ls.shape) > 1 else ls.numpy().tolist()

    oof_score = f1_score(y_true=labels, y_pred=outputs, average='micro')
    
    print(f'********** OOF F1 MICRO: {oof_score} **********')
    
    # PLOT TRAINING
    if DISPLAY_PLOT:
        plt.figure(figsize=(15,5))
        plt.plot(np.arange(len(history)), history['train'],'-o',label='Train Loss',color='#ff7f0e')
        plt.plot(np.arange(len(history)), history['val'],'-o',label='Val Loss',color='#1f77b4')
        x = np.argmin( history['val'] ); y = np.min( history['val'] )
        xdist = plt.xlim()[1] - plt.xlim()[0]; ydist = plt.ylim()[1] - plt.ylim()[0]
        plt.text(x-0.03*xdist,y-0.13*ydist,'min loss\n%.2f'%y,size=14)
        plt.ylabel('Loss',size=14); plt.xlabel('Epoch',size=14)
        plt.legend(loc=2)
        
        plt2 = plt.gca().twinx()
        plt2.plot(np.arange(len(history)),history['lr'],'-o',label='LR',color='#2ca02c')
        plt.ylabel('LR',size=14)
        
        plt.title('Experiment %i'%i,size=18)
        plt.legend(loc=3)
        plt.show()
    
    print('\n')
    