![](https://storage.googleapis.com/kaggle-competitions/kaggle/25954/logos/header.png?t=2021-03-19-18-32-57)

In [1]:
!pip install timm
!pip install --upgrade wandb

Collecting wandb
  Downloading wandb-0.14.0-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.13.10
    Uninstalling wandb-0.13.10:
      Successfully uninstalled wandb-0.13.10
Successfully installed wandb-0.14.0
[0m

In [2]:
import os
import gc
import cv2
import math
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Audio 
import torchaudio
from torchaudio.transforms import MelSpectrogram, Resample

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score

# For Image Models
import timm

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
import wandb

try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("wandb_api")
    wandb.login(key=api_key)
    anony = None
except:
    anony = "must"
    print('If you want to use your W&B account, go to Add-ons -> Secrets and provide your W&B access token. Use the Label name as wandb_api. \nGet your W&B access token from here: https://wandb.ai/authorize')

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [4]:
CONFIG = {"seed": 2023,
          "epochs": 20,
          "model_name": "efficientnet_b3",
          "embedding_size": 768,
          "num_classes": 264,
          "train_batch_size": 16,
          "valid_batch_size": 16,
          "learning_rate": 1e-3,
          "scheduler": 'CosineAnnealingLR',
          "min_lr": 1e-6,
          "T_max": 500,
          "weight_decay": 1e-6,
          "n_fold": 5,
          "n_accumulate": 1,
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
          "competition": "BirdCLEF23",
          "_wandb_kernel": "deb",
          # Audio Specific
          "sample_rate": 32_000,
          "max_time": 5,
          "n_mels": 224,
          "n_fft": 1024,
          }

In [5]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [6]:
ROOT_DIR = '../input/birdclef-2023'
TRAIN_DIR = '../input/birdclef-2023/train_audio'
TEST_DIR = '../input/birdclef-2023/test_soundscapes'

In [7]:
def get_train_file_path(filename):
    return f"{TRAIN_DIR}/{filename}"

In [8]:
df = pd.read_csv(f"{ROOT_DIR}/train_metadata.csv")
df['file_path'] = df['filename'].apply(get_train_file_path)
df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,url,filename,file_path
0,abethr1,[],['song'],4.3906,38.2788,Turdus tephronotus,African Bare-eyed Thrush,Rolf A. de By,Creative Commons Attribution-NonCommercial-Sha...,4.0,https://www.xeno-canto.org/128013,abethr1/XC128013.ogg,../input/birdclef-2023/train_audio/abethr1/XC1...
1,abethr1,[],['call'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363501,abethr1/XC363501.ogg,../input/birdclef-2023/train_audio/abethr1/XC3...
2,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,3.5,https://www.xeno-canto.org/363502,abethr1/XC363502.ogg,../input/birdclef-2023/train_audio/abethr1/XC3...
3,abethr1,[],['song'],-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,5.0,https://www.xeno-canto.org/363503,abethr1/XC363503.ogg,../input/birdclef-2023/train_audio/abethr1/XC3...
4,abethr1,[],"['call', 'song']",-2.9524,38.2921,Turdus tephronotus,African Bare-eyed Thrush,James Bradley,Creative Commons Attribution-NonCommercial-Sha...,4.5,https://www.xeno-canto.org/363504,abethr1/XC363504.ogg,../input/birdclef-2023/train_audio/abethr1/XC3...


In [9]:
encoder = LabelEncoder()
all_labels = sorted(df['primary_label'].values)
encoder.fit(all_labels)

df['primary_label'] = encoder.transform(df['primary_label'])

with open("le.pkl", "wb") as fp:
    joblib.dump(encoder, fp)

In [10]:
skf = StratifiedKFold(n_splits=CONFIG['n_fold'])

for fold, ( _, val_) in enumerate(skf.split(X=df, y=df.primary_label)):
      df.loc[val_ , "kfold"] = fold

In [11]:
class BirdCLEFDataset(Dataset):
    def __init__(self, df, target_sample_rate, max_time, image_transforms=None):
        self.file_paths = df['file_path'].values
        self.labels = df['primary_label'].values
        self.target_sample_rate = target_sample_rate
        num_samples = target_sample_rate * max_time
        self.num_samples = num_samples
        self.image_transforms = image_transforms
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, index):
        filepath = self.file_paths[index]
        audio, sample_rate = torchaudio.load(filepath)
        audio = self.to_mono(audio)
        
        if sample_rate != self.target_sample_rate:
            resample = Resample(sample_rate, self.target_sample_rate)
            audio = resample(audio)
        
        if audio.shape[0] > self.num_samples:
            audio = self.crop_audio(audio)
            
        if audio.shape[0] < self.num_samples:
            audio = self.pad_audio(audio)
            
        mel_spectogram = MelSpectrogram(sample_rate=self.target_sample_rate, 
                                        n_mels=CONFIG['n_mels'], 
                                        n_fft=CONFIG['n_fft'])
        mel = mel_spectogram(audio)
        
        label_onehot = torch.zeros(CONFIG['num_classes'])
        label_onehot[self.labels[index]] = 1
        label = torch.tensor(self.labels[index])
        
        # Convert to Image
        image = torch.stack([mel, mel, mel])
        
        # Normalize Image
        max_val = torch.abs(image).max()
        image = image / max_val
        
        return {
            "image": image, 
            "label": label,
            "label_onehot": label_onehot
        }
            
    def pad_audio(self, audio):
        pad_length = self.num_samples - audio.shape[0]
        last_dim_padding = (0, pad_length)
        audio = F.pad(audio, last_dim_padding)
        return audio
        
    def crop_audio(self, audio):
        return audio[:self.num_samples]
        
    def to_mono(self, audio):
        return torch.mean(audio, axis=0)

In [12]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

In [13]:
class BirdCLEFModel(nn.Module):
    def __init__(self, model_name, embedding_size, pretrained=True):
        super(BirdCLEFModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.embedding = nn.Linear(in_features, embedding_size)
        self.fc = nn.Linear(embedding_size, CONFIG['num_classes'])

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        embedding = self.embedding(pooled_features)
        output = self.fc(embedding)
        return output
    
model = BirdCLEFModel(CONFIG['model_name'], CONFIG['embedding_size'])
model.to(CONFIG['device']);

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b3_ra2-cf984f9c.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_ra2-cf984f9c.pth


In [14]:
def criterion(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

In [15]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    
    dataset_size = 0
    running_loss = 0.0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:
        images = data['image'].to(device)
        labels = data['label'].to(device)
        
        batch_size = images.size(0)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss = loss / CONFIG['n_accumulate']
            
        loss.backward()
    
        if (step + 1) % CONFIG['n_accumulate'] == 0:
            optimizer.step()

            # zero the parameter gradients
            optimizer.zero_grad()

            if scheduler is not None:
                scheduler.step()
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])
    gc.collect()
    
    return epoch_loss

In [16]:
def padded_cmap(solution, submission, padding_factor=5):
    new_rows = []
    for i in range(padding_factor):
        new_rows.append([1 for i in range(len(solution.columns))])
    new_rows = pd.DataFrame(new_rows)
    new_rows.columns = solution.columns
    padded_solution = pd.concat([solution, new_rows]).reset_index(drop=True).copy()
    padded_submission = pd.concat([submission, new_rows]).reset_index(drop=True).copy()
    score = average_precision_score(
        padded_solution.values,
        padded_submission.values,
        average='macro',
    )
    return score

In [17]:
@torch.inference_mode()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    LABELS = []
    PREDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:        
        images = data['image'].to(device)
        labels = data['label_onehot'].to(device)
        
        batch_size = images.size(0)

        outputs = model(images)
        loss = criterion(outputs, labels)
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        PREDS.append(F.softmax(outputs, dim=1).cpu().detach().numpy())
        LABELS.append(labels.cpu().detach().numpy())
        
        bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])   
    
    LABELS = np.vstack(LABELS)
    PREDS = np.vstack(PREDS)
    val_cmap = padded_cmap(pd.DataFrame(LABELS), pd.DataFrame(PREDS))
    gc.collect()
    
    return epoch_loss, val_cmap

In [18]:
def run_training(model, optimizer, scheduler, device, num_epochs):
    # To automatically log gradients
    wandb.watch(model, log_freq=100)
    
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_cmap = 0
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        train_epoch_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=CONFIG['device'], epoch=epoch)
        
        val_epoch_loss, val_epoch_cmap = valid_one_epoch(model, valid_loader, 
                                                       device=CONFIG['device'], 
                                                       epoch=epoch)
    
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(val_epoch_loss)
        history['Valid CMAP'].append(val_epoch_cmap)
        
        # Log the metrics
        wandb.log({"Train Loss": train_epoch_loss})
        wandb.log({"Valid Loss": val_epoch_loss})
        wandb.log({"Valid CMAP": val_epoch_cmap})
        
        # deep copy the model
        if val_epoch_cmap >= best_epoch_cmap:
            print(f"{b_}Validation CMAP Improved ({best_epoch_cmap} ---> {val_epoch_cmap})")
            best_epoch_cmap = val_epoch_cmap
            run.summary["Best CMAP Score"] = best_epoch_cmap
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = "CMAP{:.4f}_epoch{:.0f}.bin".format(best_epoch_cmap, epoch)
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            print(f"Model Saved{sr_}")
            
        print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best CMAP: {:.4f}".format(best_epoch_cmap))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

In [19]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG['T_max'], 
                                                   eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CONFIG['T_0'], 
                                                             eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

In [20]:
def prepare_loaders(df, fold):
    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)
    
    train_dataset = BirdCLEFDataset(df_train, target_sample_rate=CONFIG['sample_rate'], max_time=CONFIG['max_time'])
    valid_dataset = BirdCLEFDataset(df_valid, target_sample_rate=CONFIG['sample_rate'], max_time=CONFIG['max_time'])

    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                              num_workers=2, shuffle=True, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

In [21]:
train_loader, valid_loader = prepare_loaders(df, fold=0)

In [22]:
optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'], 
                       weight_decay=CONFIG['weight_decay'])
scheduler = fetch_scheduler(optimizer)

In [23]:
run = wandb.init(project=CONFIG['competition'], 
                 config=CONFIG,
                 job_type='Train',
                 name=CONFIG['model_name'],
                 tags=['gem-pooling', CONFIG['model_name']],
                 anonymous='must')

[34m[1mwandb[0m: Currently logged in as: [33m404vector[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [24]:
model, history = run_training(model, optimizer, scheduler,
                              device=CONFIG['device'],
                              num_epochs=CONFIG['epochs'])

[INFO] Using GPU: Tesla P100-PCIE-16GB



100%|██████████| 847/847 [13:08<00:00,  1.07it/s, Epoch=1, LR=0.000786, Train_Loss=3.94]
100%|██████████| 212/212 [02:38<00:00,  1.34it/s, Epoch=1, LR=0.000786, Valid_Loss=3.96]


[34mValidation CMAP Improved (0 ---> 0.5483836685200932)
Model Saved[0m



100%|██████████| 847/847 [12:47<00:00,  1.10it/s, Epoch=2, LR=0.000328, Train_Loss=2.85]
100%|██████████| 212/212 [02:31<00:00,  1.40it/s, Epoch=2, LR=0.000328, Valid_Loss=2.53]


[34mValidation CMAP Improved (0.5483836685200932 ---> 0.6814788944454856)
Model Saved[0m



100%|██████████| 847/847 [12:36<00:00,  1.12it/s, Epoch=3, LR=1.75e-5, Train_Loss=2.37]
100%|██████████| 212/212 [02:31<00:00,  1.40it/s, Epoch=3, LR=1.75e-5, Valid_Loss=2.15]


[34mValidation CMAP Improved (0.6814788944454856 ---> 0.7267016510266865)
Model Saved[0m



100%|██████████| 847/847 [12:44<00:00,  1.11it/s, Epoch=4, LR=0.00012, Train_Loss=1.94]
100%|██████████| 212/212 [02:29<00:00,  1.42it/s, Epoch=4, LR=0.00012, Valid_Loss=2.09]


[34mValidation CMAP Improved (0.7267016510266865 ---> 0.736862018617994)
Model Saved[0m



100%|██████████| 847/847 [12:50<00:00,  1.10it/s, Epoch=5, LR=0.000548, Train_Loss=1.57]
100%|██████████| 212/212 [02:33<00:00,  1.38it/s, Epoch=5, LR=0.000548, Valid_Loss=2.18]





100%|██████████| 847/847 [12:46<00:00,  1.10it/s, Epoch=6, LR=0.000935, Train_Loss=1.27]
100%|██████████| 212/212 [02:32<00:00,  1.39it/s, Epoch=6, LR=0.000935, Valid_Loss=2.5]





100%|██████████| 847/847 [12:50<00:00,  1.10it/s, Epoch=7, LR=0.000951, Train_Loss=1.08]
100%|██████████| 212/212 [02:31<00:00,  1.40it/s, Epoch=7, LR=0.000951, Valid_Loss=2.51]





100%|██████████| 847/847 [12:57<00:00,  1.09it/s, Epoch=8, LR=0.000582, Train_Loss=0.998]
100%|██████████| 212/212 [02:33<00:00,  1.38it/s, Epoch=8, LR=0.000582, Valid_Loss=2.34]


[34mValidation CMAP Improved (0.736862018617994 ---> 0.7532686383238025)
Model Saved[0m



100%|██████████| 847/847 [12:59<00:00,  1.09it/s, Epoch=9, LR=0.000143, Train_Loss=0.938]
100%|██████████| 212/212 [02:32<00:00,  1.39it/s, Epoch=9, LR=0.000143, Valid_Loss=2.08]


[34mValidation CMAP Improved (0.7532686383238025 ---> 0.7692280899743379)
Model Saved[0m



100%|██████████| 847/847 [13:03<00:00,  1.08it/s, Epoch=10, LR=9.85e-6, Train_Loss=0.789]
100%|██████████| 212/212 [02:31<00:00,  1.40it/s, Epoch=10, LR=9.85e-6, Valid_Loss=2.15]





100%|██████████| 847/847 [12:48<00:00,  1.10it/s, Epoch=11, LR=0.000296, Train_Loss=0.662]
100%|██████████| 212/212 [02:30<00:00,  1.41it/s, Epoch=11, LR=0.000296, Valid_Loss=2.3]





100%|██████████| 847/847 [12:43<00:00,  1.11it/s, Epoch=12, LR=0.000757, Train_Loss=0.53]
100%|██████████| 212/212 [02:29<00:00,  1.42it/s, Epoch=12, LR=0.000757, Valid_Loss=2.79]





100%|██████████| 847/847 [12:38<00:00,  1.12it/s, Epoch=13, LR=0.000999, Train_Loss=0.454]
100%|██████████| 212/212 [02:28<00:00,  1.43it/s, Epoch=13, LR=0.000999, Valid_Loss=2.7]





100%|██████████| 847/847 [12:39<00:00,  1.12it/s, Epoch=14, LR=0.000814, Train_Loss=0.405]
100%|██████████| 212/212 [02:28<00:00,  1.42it/s, Epoch=14, LR=0.000814, Valid_Loss=2.84]





100%|██████████| 847/847 [12:42<00:00,  1.11it/s, Epoch=15, LR=0.000361, Train_Loss=0.442]
100%|██████████| 212/212 [02:29<00:00,  1.42it/s, Epoch=15, LR=0.000361, Valid_Loss=2.52]





100%|██████████| 847/847 [12:43<00:00,  1.11it/s, Epoch=16, LR=2.74e-5, Train_Loss=0.427]
100%|██████████| 212/212 [02:28<00:00,  1.43it/s, Epoch=16, LR=2.74e-5, Valid_Loss=2.53]





100%|██████████| 847/847 [12:39<00:00,  1.11it/s, Epoch=17, LR=9.82e-5, Train_Loss=0.393]
100%|██████████| 212/212 [02:30<00:00,  1.41it/s, Epoch=17, LR=9.82e-5, Valid_Loss=2.59]





100%|██████████| 847/847 [12:45<00:00,  1.11it/s, Epoch=18, LR=0.000513, Train_Loss=0.338]
100%|██████████| 212/212 [02:30<00:00,  1.41it/s, Epoch=18, LR=0.000513, Valid_Loss=2.75]





100%|██████████| 847/847 [12:49<00:00,  1.10it/s, Epoch=19, LR=0.000917, Train_Loss=0.274]
100%|██████████| 212/212 [02:31<00:00,  1.40it/s, Epoch=19, LR=0.000917, Valid_Loss=2.96]





100%|██████████| 847/847 [12:50<00:00,  1.10it/s, Epoch=20, LR=0.000965, Train_Loss=0.242]
100%|██████████| 212/212 [02:31<00:00,  1.40it/s, Epoch=20, LR=0.000965, Valid_Loss=3.27]



Training complete in 5h 7m 5s
Best CMAP: 0.7692


In [25]:
run.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train Loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁
Valid CMAP,▁▅▇▇▇▆▇▇███▇▇▇███▇▇▇
Valid Loss,█▃▁▁▁▃▃▂▁▁▂▄▃▄▃▃▃▃▄▅

0,1
Best CMAP Score,0.76923
Train Loss,0.24188
Valid CMAP,0.73
Valid Loss,3.27386
