import wandb
try:
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    api_key = user_secrets.get_secret("WANDB")
    # Login to wandb with the API key
    wandb.login(key=api_key)
    # Set anonymous mode to None
    anonymous = None
except:
    # If Kaggle secrets are not available, set anonymous mode to 'must'
    anonymous = 'must'
    # Login to wandb anonymously and relogin if needed
    wandb.login(anonymous=anonymous, relogin=True)

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchaudio
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import IPython.display as ipd
from datetime import datetime
import time

from torch.utils.data import DataLoader
from torchmetrics.classification import MulticlassAccuracy, MultilabelAccuracy
import audiomentations
from torch.utils.data import default_collate
from torchvision.transforms import v2

from src.audio_utils import play_audio, plot_specgram, plot_waveform
from src.data import AudioDataset, FrequencyMaskingAug, TimeMaskingAug
from src.data_utils import get_metadata, get_fold, get_metadata_from_csv
from src.train_utils import FocalLoss, get_cosine_schedule_with_warmup, wandb_init
from src.models import BasicClassifier
from src.utils import score_np, roc_auc

import ast
import wandb
import yaml

  from .autonotebook import tqdm as notebook_tqdm


### Config

In [2]:
class Config:
    duration = 10
    sample_rate = 32000
    target_length = 384
    n_mels = 128
    n_fft = 2028
    window = 2028
    audio_len = duration*sample_rate
    hop_length = audio_len // (target_length-1)
    fmin = 20
    fmax = 16000
    top_db = 80

    n_classes = 182
    batch_size = 24
    model_name = 'efficientnet_v2_s'
    n_folds = 5
    upsample_thr = 50
    use_class_weights = False   # Test

    standardize = False
    dataset_mean = [-16.8828]
    dataset_std = [12.4019]

    data_aug = True     # Test     
    cutmix_mixup = False     # Test
    loss = 'bce'    # Test ('crossentropy', 'bce')
    secondary_labels_weight = 0.3   # Test (0)
    use_focal = False    # Test (only with bce)
    focal_gamma = 2
    focal_lambda = 1
    label_smoothing = 0.05  # Only with crossentropy

    num_epochs = 10
    warmup_epochs = 0.5
    lr = 1e-3
    start_lr = 0.01 # relative to lr
    final_lr = 0.01
    weight_decay = 0.0001

    wandb = True
    competition   = 'birdclef-2024' 
    _wandb_kernel = 'cvincent13'
    date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    run_name = f"{date}_fold-{0}_dim-{n_mels}x{target_length}_model-{model_name}"
    wandb_group = 'FirstTests'

#metadata = get_metadata(Config.n_folds)
metadata = get_metadata_from_csv('metadata.csv')

### Dataset

In [3]:
fold = 0
train_df, valid_df, class_weights = get_fold(metadata, fold, up_thr=Config.upsample_thr)

Num Train: 22045, 182 classes |Num Valid: 4892, 182 classes


In [4]:
# Data transforms and augmentations
waveform_transforms = audiomentations.Compose([
    audiomentations.Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
    audiomentations.SevenBandParametricEQ(min_gain_db=-12., max_gain_db=12., p=0.5),
    audiomentations.AirAbsorption(min_temperature=10, max_temperature=20, min_humidity=30, max_humidity=90,
                                  min_distance=10, max_distance=100, p=1.), 

    audiomentations.OneOf([
        audiomentations.Gain(min_gain_db=-6., max_gain_db=6., p=1),  # How to handle waveforms out of [-1, 1] ? dont see the issue
        audiomentations.GainTransition(min_gain_db=-12., max_gain_db=3., p=1)
    ], p=1.),

    audiomentations.OneOf([
        audiomentations.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.),
        audiomentations.AddGaussianSNR(min_snr_db=5., max_snr_db=40., p=1.),
        audiomentations.AddColorNoise(min_snr_db=5., max_snr_db=40., min_f_decay=-3.01, max_f_decay=-3.01, p=1.)
    ], p=1.),

    #audiomentations.AddShortNoises(sounds_path=unlabeled_dir, min_snr_db=3., max_snr_db=30., 
    #                           noise_rms='relative_to_whole_input',
    #                           min_time_between_sounds=2., max_time_between_sounds=8., 
    #                           noise_transform=audiomentations.PolarityInversion(), p=0.5),
    #audiomentations.AddBackgroundNoise(sounds_path=unlabeled_dir, min_snr_db=3., max_snr_db=30., 
    #                               noise_transform=audiomentations.PolarityInversion(), p=0.5),
                                   
    audiomentations.LowPassFilter(min_cutoff_freq=750., max_cutoff_freq=7500., min_rolloff=12, max_rolloff=24, p=0.8),
    audiomentations.PitchShift(min_semitones=-2.5, max_semitones=2.5, p=0.3)
])

spec_transforms = nn.Sequential(
    FrequencyMaskingAug(0.3, 0.1, Config.n_mels, n_masks=3, mask_mode='mean'),
    TimeMaskingAug(0.3, 0.1, Config.target_length, n_masks=3, mask_mode='mean'),
)


waveform_transforms=None if not Config.data_aug else waveform_transforms
spec_transforms=None if not Config.data_aug else spec_transforms


train_dataset = AudioDataset(
    train_df, 
    n_classes=Config.n_classes,
    duration=Config.duration,
    sample_rate=Config.sample_rate,
    target_length=Config.target_length,
    n_mels=Config.n_mels,
    n_fft=Config.n_fft,
    window=Config.window,
    hop_length=Config.hop_length,
    fmin=Config.fmin,
    fmax=Config.fmax,
    top_db=Config.top_db,
    waveform_transforms=waveform_transforms,
    spec_transforms=spec_transforms,
    standardize=Config.standardize,
    mean=Config.dataset_mean,
    std=Config.dataset_std,
    loss=Config.loss,
    secondary_labels_weight=Config.secondary_labels_weight
    )
val_dataset = AudioDataset(
    valid_df, 
    n_classes=Config.n_classes,
    duration=Config.duration,
    sample_rate=Config.sample_rate,
    target_length=Config.target_length,
    n_mels=Config.n_mels,
    n_fft=Config.n_fft,
    window=Config.window,
    hop_length=Config.hop_length,
    fmin=Config.fmin,
    fmax=Config.fmax,
    top_db=Config.top_db,
    waveform_transforms=None,
    spec_transforms=None,
    standardize=Config.standardize,
    mean=Config.dataset_mean,
    std=Config.dataset_std,
    loss=Config.loss,
    secondary_labels_weight=Config.secondary_labels_weight
    )

### Training

In [5]:
cutmix_or_mixup = v2.RandomApply([
    v2.RandomChoice([
        v2.CutMix(num_classes=Config.n_classes, alpha=0.5, one_hot_labels=Config.loss=='bce'),
        v2.MixUp(num_classes=Config.n_classes, alpha=0.5, one_hot_labels=Config.loss=='bce')
    ], p=[0.65, 0.35])
], p=0.7)


def mix_collate_fn(batch):
    return cutmix_or_mixup(*default_collate(batch))

collate_fn = mix_collate_fn if Config.cutmix_mixup else None

train_loader = DataLoader(train_dataset, batch_size=Config.batch_size, shuffle=True, num_workers=6, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=Config.batch_size, shuffle=False, num_workers=6)

In [6]:
device = torch.device('cuda')

model = BasicClassifier(Config.n_classes, Config.model_name).to(device)
optimizer = torch.optim.Adam(model.parameters(), weight_decay=Config.weight_decay, lr=Config.lr)
spe = len(train_loader)
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=spe*Config.warmup_epochs, num_training_steps=spe*Config.num_epochs, 
                                            start_lr=Config.start_lr, final_lr=Config.final_lr)
                                                
pos_weight = torch.tensor(class_weights).to(device) if Config.use_class_weights else None
if Config.loss == 'crossentropy':
    criterion = nn.CrossEntropyLoss(label_smoothing=Config.label_smoothing, weight=pos_weight)
    accuracy = MulticlassAccuracy(num_classes=Config.n_classes).to(device)
elif Config.loss == 'bce':
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight, weight=None)
    accuracy = MultilabelAccuracy(num_labels=Config.n_classes).to(device)

focal_criterion = FocalLoss(gamma=Config.focal_gamma, pos_weight=pos_weight)

### Training loop

In [7]:
start_time = time.time()
if Config.wandb:
    run = wandb_init(fold, Config)

save_dir = f"checkpoints/{Config.run_name}"
train_losses = []
val_losses = []
train_metrics = {'AUC': [], 'Accuracy': [], 'Score': []}
val_metrics = {'AUC': [], 'Accuracy': [], 'Score': []}

for epoch in range(Config.num_epochs):
    train_loss = 0
    train_accuracy = 0
    gt = []
    preds = []
    model.train()
    train_iter = tqdm(train_loader)
    for (batch, labels) in train_iter:
        optimizer.zero_grad()

        batch = batch.to(device)
        labels = labels.to(device)

        out = model(batch)
        if Config.use_focal:
            loss = criterion(out, labels) + Config.focal_lambda * focal_criterion(out, labels)
        else:
            loss = criterion(out, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        train_iter.set_description(desc=f'train loss: {loss.item():.3f}')
        train_loss += loss.item()
        train_accuracy += accuracy(out, (labels>0).int())
        if Config.loss == 'bce':
            gt.append(((labels.detach().cpu().numpy())>0).astype(int))
            preds.append(out.sigmoid().detach().cpu().numpy())
        elif Config.loss == 'crossentropy':
            gt.append(nn.functional.one_hot(labels.detach().cpu(), num_classes=Config.n_classes).numpy())
            preds.append(nn.functional.softmax(out, dim=1).detach().cpu().numpy())

    train_loss = train_loss / len(train_loader)
    train_losses.append(train_loss)
    train_accuracy = train_accuracy / len(train_loader)
    train_metrics["Accuracy"].append(train_accuracy)
    gt = np.concatenate(gt)
    preds = np.concatenate(preds)
    train_auc = roc_auc(preds, gt)
    train_score = score_np(preds, gt)
    train_metrics["AUC"].append(train_auc)
    train_metrics["Score"].append(train_score)


    val_loss = 0
    val_accuracy = 0
    gt = []
    preds = []
    model.eval()
    val_iter = tqdm(val_loader)
    for (batch, labels) in val_iter:
        batch = batch.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            out = model(batch)
            if Config.use_focal:
                loss = criterion(out, labels) + Config.focal_lambda * focal_criterion(out, labels)
            else:
                loss = criterion(out, labels)

        val_iter.set_description(desc=f'val loss: {loss.item():.3f}')
        val_loss += loss.item()
        val_accuracy += accuracy(out, (labels>0).int())
        if Config.loss == 'bce':
            gt.append(((labels.detach().cpu().numpy())>0).astype(int))
            preds.append(out.sigmoid().detach().cpu().numpy())
        elif Config.loss == 'crossentropy':
            gt.append(nn.functional.one_hot(labels.detach().cpu(), num_classes=Config.n_classes).numpy())
            preds.append(nn.functional.softmax(out, dim=1).detach().cpu().numpy())

    val_loss = val_loss / len(val_loader)
    val_losses.append(val_loss)
    val_accuracy = val_accuracy / len(val_loader)
    val_metrics['Accuracy'].append(val_accuracy)
    gt = np.concatenate(gt)
    preds = np.concatenate(preds)
    val_auc = roc_auc(preds, gt)
    val_score = score_np(preds, gt)
    val_metrics['AUC'].append(val_auc)
    val_metrics['Score'].append(val_score)

    save_dict = {
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "scheduler": scheduler.state_dict(),
        "epoch": epoch+1,
        "train_losses": train_losses,
        "train_metrics": train_metrics,
        "val_losses": val_losses,
        "val_metrics": val_metrics
    }

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    torch.save(save_dict, save_dir + "/checkpoint.pth")
    with open(save_dir + "/logs.txt", "w") as f:
        f.write(f"Epoch {epoch+1}: Train Loss = {train_loss:.3f} | Val Loss = {val_loss:.3f}")
        f.write("\n")
        f.write("CONFIG:")
        for k,v in dict(vars(Config)).items():
            if '__' not in k:
                f.write("\n")
                f.write(f"{k}: {v}")


    if Config.wandb:
        wandb.log({
            "train_loss": train_loss,
            "train accuracy": train_accuracy,
            "train_auc": train_auc,
            "train_score": train_score,
            "val_loss": val_loss,
            "val_accuracy": val_accuracy,
            "val_auc": val_auc,
            "val_score": val_score,
            "lr": scheduler.get_last_lr()
        })


    print(f'Epoch {epoch+1}: Train Loss = {train_loss:.3f}, Train Accuracy = {train_accuracy:.3f}, Train ROCAUC = {train_auc:.3f},\
Train score = {train_score:.3f} | Val Loss = {val_loss:.3f}, Val Accuracy = {val_accuracy:.3f}, \
Val ROCAUC = {val_auc:.3f}, Val score = {val_score:.3f}')


def format_duration(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return "{:02}h {:02}min {:02}s".format(int(hours), int(minutes), int(seconds))

print(f'Done in {format_duration(time.time() - start_time)}')

if Config.wandb:
    #print('# WandB')
    #log_wandb(valid_df)
    wandb.run.finish()
    display(ipd.IFrame(run.url, width=1080, height=720))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcvincent13[0m ([33m667[0m). Use [1m`wandb login --relogin`[0m to force relogin


train loss: 0.035:  49%|████▊     | 446/919 [06:38<03:40,  2.14it/s]wandb: Network error (ConnectionError), entering retry loop.
train loss: 0.031: 100%|██████████| 919/919 [13:32<00:00,  1.13it/s]
val loss: 0.036: 100%|██████████| 204/204 [01:05<00:00,  3.11it/s]


Epoch 1: Train Loss = 0.066, Train Accuracy = 0.982, Train ROCAUC = 0.509,Train score = 0.509 | Val Loss = 0.038, Val Accuracy = 0.994, Val ROCAUC = 0.597, Val score = 0.597


train loss: 0.034: 100%|██████████| 919/919 [13:19<00:00,  1.15it/s]
val loss: 0.032: 100%|██████████| 204/204 [01:04<00:00,  3.17it/s]


Epoch 2: Train Loss = 0.035, Train Accuracy = 0.994, Train ROCAUC = 0.520,Train score = 0.520 | Val Loss = 0.035, Val Accuracy = 0.994, Val ROCAUC = 0.556, Val score = 0.556


train loss: 0.034: 100%|██████████| 919/919 [13:16<00:00,  1.15it/s]
val loss: 0.030: 100%|██████████| 204/204 [01:03<00:00,  3.19it/s]


Epoch 3: Train Loss = 0.034, Train Accuracy = 0.994, Train ROCAUC = 0.497,Train score = 0.497 | Val Loss = 0.033, Val Accuracy = 0.994, Val ROCAUC = 0.571, Val score = 0.571


train loss: 0.034: 100%|██████████| 919/919 [13:16<00:00,  1.15it/s]
val loss: 0.030: 100%|██████████| 204/204 [01:04<00:00,  3.17it/s]


Epoch 4: Train Loss = 0.034, Train Accuracy = 0.994, Train ROCAUC = 0.496,Train score = 0.496 | Val Loss = 0.033, Val Accuracy = 0.994, Val ROCAUC = 0.498, Val score = 0.498


train loss: 0.033: 100%|██████████| 919/919 [13:15<00:00,  1.15it/s]
val loss: 0.030: 100%|██████████| 204/204 [01:04<00:00,  3.18it/s]


Epoch 5: Train Loss = 0.034, Train Accuracy = 0.994, Train ROCAUC = 0.493,Train score = 0.493 | Val Loss = 0.033, Val Accuracy = 0.994, Val ROCAUC = 0.586, Val score = 0.586


train loss: 0.033: 100%|██████████| 919/919 [13:15<00:00,  1.16it/s]
val loss: 0.027: 100%|██████████| 204/204 [01:04<00:00,  3.18it/s]


Epoch 6: Train Loss = 0.034, Train Accuracy = 0.994, Train ROCAUC = 0.517,Train score = 0.517 | Val Loss = 0.032, Val Accuracy = 0.994, Val ROCAUC = 0.621, Val score = 0.621


train loss: 0.031: 100%|██████████| 919/919 [13:13<00:00,  1.16it/s]
val loss: 0.027: 100%|██████████| 204/204 [01:03<00:00,  3.19it/s]


Epoch 7: Train Loss = 0.033, Train Accuracy = 0.994, Train ROCAUC = 0.530,Train score = 0.530 | Val Loss = 0.032, Val Accuracy = 0.994, Val ROCAUC = 0.624, Val score = 0.624


train loss: 0.035: 100%|██████████| 919/919 [13:13<00:00,  1.16it/s]
val loss: 0.025: 100%|██████████| 204/204 [01:04<00:00,  3.19it/s]


Epoch 8: Train Loss = 0.033, Train Accuracy = 0.994, Train ROCAUC = 0.540,Train score = 0.540 | Val Loss = 0.032, Val Accuracy = 0.994, Val ROCAUC = 0.613, Val score = 0.613


train loss: 0.033: 100%|██████████| 919/919 [13:21<00:00,  1.15it/s]
val loss: 0.026: 100%|██████████| 204/204 [01:03<00:00,  3.20it/s]


Epoch 9: Train Loss = 0.033, Train Accuracy = 0.994, Train ROCAUC = 0.550,Train score = 0.550 | Val Loss = 0.032, Val Accuracy = 0.994, Val ROCAUC = 0.669, Val score = 0.669


  0%|          | 0/919 [00:03<?, ?it/s]


KeyboardInterrupt: 

In [8]:
wandb.run.finish()
display(ipd.IFrame(run.url, width=1080, height=720))

0,1
train accuracy,▁████████
train_auc,▃▄▂▁▁▄▆▇█
train_loss,█▁▁▁▁▁▁▁▁
train_score,▃▄▂▁▁▄▆▇█
val_accuracy,▁▁▁▁▁▁▁▁▁
val_auc,▅▃▄▁▅▆▆▆█
val_loss,█▅▂▂▂▁▁▁▁
val_score,▅▃▄▁▅▆▆▆█

0,1
train accuracy,0.9939
train_auc,0.55018
train_loss,0.0332
train_score,0.55018
val_accuracy,0.99397
val_auc,0.66944
val_loss,0.03185
val_score,0.66944


In [11]:
nn.functional.one_hot(labels.detach().cpu(), num_classes=182).numpy().shape

(13, 182)

In [13]:
labels

tensor([ 43,  13, 145, 152,  27, 137, 132,  62, 107, 109,  82,  17, 180],
       device='cuda:0')

In [14]:
preds.shape

(22045, 182)

In [17]:
nn.functional.softmax(out, dim=1)

tensor([[9.4180e-06, 4.2658e-04, 5.8844e-05,  ..., 3.0927e-05, 7.2497e-06,
         1.0027e-05],
        [1.2548e-04, 3.9156e-04, 7.0476e-06,  ..., 2.0452e-05, 8.9177e-05,
         5.7237e-04],
        [4.5435e-03, 6.3721e-03, 4.4781e-03,  ..., 4.3707e-03, 9.0999e-04,
         1.8922e-02],
        ...,
        [7.9350e-03, 5.7408e-04, 1.5541e-03,  ..., 1.1247e-03, 1.5814e-04,
         1.6370e-02],
        [5.3954e-03, 1.9911e-02, 3.6692e-04,  ..., 9.7607e-04, 6.4620e-03,
         5.6922e-04],
        [9.3946e-04, 5.5925e-03, 3.4661e-04,  ..., 2.3105e-04, 1.0715e-01,
         1.1536e-04]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [15]:
preds

array([[0.36691537, 0.48500904, 0.4609493 , ..., 0.5471893 , 0.42748517,
        0.5710229 ],
       [0.52537864, 0.4471284 , 0.5180808 , ..., 0.47096235, 0.5777382 ,
        0.41334236],
       [0.47061932, 0.52697694, 0.55479383, ..., 0.61512756, 0.4984799 ,
        0.484453  ],
       ...,
       [0.46711853, 0.0596365 , 0.14653054, ..., 0.11051289, 0.01716946,
        0.64393234],
       [0.39192435, 0.7040214 , 0.04199148, ..., 0.10442515, 0.43564963,
        0.06366991],
       [0.22335546, 0.6312654 , 0.09592591, ..., 0.06605653, 0.9704151 ,
        0.03410903]], dtype=float32)