In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchaudio
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import IPython.display as ipd
from datetime import datetime
import time

from torch.utils.data import DataLoader
from torchmetrics.classification import MulticlassAccuracy, MultilabelAccuracy
import audiomentations
from torch.utils.data import default_collate
from torchvision.transforms import v2
import timm

from src.audio_utils import play_audio, plot_specgram, plot_waveform
from src.data import AudioDataset, FrequencyMaskingAug, TimeMaskingAug, CutMix, MixUp
from src.data_utils import get_metadata, get_fold, get_metadata_from_csv
from src.train_utils import FocalLoss, BCEFocal2WayLoss, get_cosine_schedule_with_warmup, wandb_init, train_one_epoch, eval_one_epoch
from src.models import BasicClassifier, GeMClassifier, SEDClassifier
from src.utils import score_np, roc_auc

import ast
import wandb
import yaml

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from src.efficientat.dymn.model import get_model as get_dymn
model = get_dymn(width_mult=1., pretrained_name="dymn10_as", num_classes=182, features_only=True, out_indices=(1,2,3,4,5,6,10,12,15,16))

Downloading: "https://github.com/fschmid56/EfficientAT/releases/download/v0.0.1/dymn10_as.pt" to resources/dymn10_as.pt
100%|██████████| 40.5M/40.5M [00:06<00:00, 6.47MB/s]


Features only is true, Dropping final fully-connected layer and loading weights in non-strict mode
DyMN(
  (layers): ModuleList(
    (0): DY_Block(
      (exp_conv): DynamicWrapper(
        (module): Identity()
      )
      (exp_norm): Identity()
      (exp_act): DynamicWrapper(
        (module): Identity()
      )
      (depth_conv): DynamicConv(
        (residuals): Sequential(
          (0): Linear(in_features=32, out_features=4, bias=True)
        )
      )
      (depth_norm): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (depth_act): DyReLUB(
        (coef_net): Sequential(
          (0): Linear(in_features=32, out_features=64, bias=True)
        )
        (sigmoid): Sigmoid()
      )
      (ca): CoordAtt()
      (proj_conv): DynamicConv(
        (residuals): Sequential(
          (0): Linear(in_features=32, out_features=4, bias=True)
        )
      )
      (proj_norm): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_

In [3]:
a = torch.randn((2,1,128,128))
preds, features = model(a)

In [5]:
for f in features:
    print(f.shape)

torch.Size([2, 16, 64, 64])
torch.Size([2, 24, 32, 32])
torch.Size([2, 24, 32, 32])
torch.Size([2, 40, 16, 16])
torch.Size([2, 40, 16, 16])
torch.Size([2, 40, 16, 16])
torch.Size([2, 80, 8, 8])
torch.Size([2, 112, 8, 8])
torch.Size([2, 160, 4, 4])
torch.Size([2, 960, 4, 4])


In [4]:
preds.shape

torch.Size([2, 960, 4, 4])

In [11]:
from src.efficientat.mn.model import get_model as get_mn
model = get_mn(width_mult=1., pretrained_name="mn10_as", num_classes=182, features_only=True, out_indices=(1,3,6,10,12,15,16))

Features only is true, Dropping final fully-connected layer and loading weights in non-strict mode
MN(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvN



In [12]:
a = torch.randn((2,1,128,128))
preds, features = model(a)

In [13]:
preds.shape

torch.Size([2, 960, 4, 4])

In [14]:
for f in features:
    print(f.shape)

torch.Size([2, 16, 64, 64])
torch.Size([2, 24, 32, 32])
torch.Size([2, 40, 16, 16])
torch.Size([2, 80, 8, 8])
torch.Size([2, 112, 8, 8])
torch.Size([2, 160, 4, 4])
torch.Size([2, 960, 4, 4])


### Config

In [2]:
class Config:
    start_idx = 'first',
    duration = 5
    sample_rate = 32000
    target_length = 256
    n_mels = 256
    n_fft = 2028
    window = 2028
    audio_len = duration*sample_rate
    hop_length = audio_len // (target_length-1)
    fmin = 20
    fmax = 16000
    top_db = 80

    n_classes = 182
    batch_size = 20
    Model = SEDClassifier
    model_name = 'tf_efficientnetv2_s'
    n_folds = 5
    upsample_thr = 25
    use_class_weights = True   # Test

    standardize = False
    dataset_mean = [-16.8828]
    dataset_std = [12.4019]

    data_aug = True     # Test     
    cutmix_mixup = False     # Test
    loss = 'crossentropy'    # Test ('crossentropy', 'bce')
    secondary_labels_weight = 0.3   # Test (0)
    use_focal = False    # Test (only with bce)
    use_2wayfocal = False
    focal_gamma = 2
    focal_lambda = 1
    label_smoothing = 0.05  # Only with crossentropy

    folds = [2,3,4]
    num_epochs = 9
    warmup_epochs = 1
    lr = 1e-4
    start_lr = 0.01 # relative to lr
    final_lr = 0.01
    weight_decay = 0.0001
    max_grad_norm = 10

    wandb = True
    competition   = 'birdclef-2024' 
    _wandb_kernel = 'cvincent13'
    date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    run_name = f"{date}_{n_mels}x{target_length}_{model_name}"
    wandb_group = 'FullCV'

    base_dir = ''
    short_noises = 'data/background/esc50/use_label'
    background_noises = ['data/background/aicrowd2020_noise_30sec/noise_30sec',
                         'data/background/ff1010bird_nocall/nocall',
                         #'data/background/train_soundscapes/nocall'
                        ]

#metadata = get_metadata(Config.n_folds)
metadata = get_metadata_from_csv('metadata.csv', 'data')

### Dataset

In [3]:
# Data transforms and augmentations
waveform_transforms = audiomentations.Compose([
    audiomentations.Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
    audiomentations.SevenBandParametricEQ(min_gain_db=-12., max_gain_db=12., p=0.2),
    audiomentations.AirAbsorption(min_temperature=10, max_temperature=20, min_humidity=30, max_humidity=90,
                                  min_distance=10, max_distance=100, p=0.8), 

    audiomentations.OneOf([
        audiomentations.Gain(min_gain_db=-4., max_gain_db=4., p=1), 
        audiomentations.GainTransition(min_gain_db=-12., max_gain_db=3., p=1)
    ], p=0.5),

    audiomentations.OneOf([
        audiomentations.AddGaussianSNR(min_snr_db=5., max_snr_db=40., p=1.),
        audiomentations.AddColorNoise(min_snr_db=5., max_snr_db=40., min_f_decay=-3.01, max_f_decay=-3.01, p=1.)
    ], p=0.5),

    audiomentations.AddShortNoises(sounds_path=Config.short_noises, min_snr_db=5., max_snr_db=30., 
                               noise_rms='relative_to_whole_input',
                               min_time_between_sounds=2., max_time_between_sounds=8., 
                               noise_transform=audiomentations.PolarityInversion(), p=0.4),
    audiomentations.AddBackgroundNoise(sounds_path=Config.background_noises, min_snr_db=5., max_snr_db=30., 
                                   noise_transform=audiomentations.PolarityInversion(), p=0.4),
                                   
    audiomentations.LowPassFilter(min_cutoff_freq=750., max_cutoff_freq=7500., min_rolloff=12, max_rolloff=24, p=0.5),
    audiomentations.PitchShift(min_semitones=-2.5, max_semitones=2.5, p=0.3)
])

spec_transforms = nn.Sequential(
    FrequencyMaskingAug(0.4, 0.1, Config.n_mels, n_masks=3, mask_mode='mean'),
    TimeMaskingAug(0.4, 0.1, Config.target_length, n_masks=3, mask_mode='mean'),
)

waveform_transforms=None if not Config.data_aug else waveform_transforms
spec_transforms=None if not Config.data_aug else spec_transforms


cutmix_or_mixup = v2.RandomApply([
    v2.RandomChoice([
        CutMix(num_classes=Config.n_classes, alpha=0.5, one_hot_labels=Config.loss=='bce'),
        MixUp(num_classes=Config.n_classes, alpha=0.5, one_hot_labels=Config.loss=='bce')
    ], p=[0.65, 0.35])
], p=0.5)


def mix_collate_fn(batch):
    return cutmix_or_mixup(*default_collate(batch))

collate_fn = mix_collate_fn if Config.cutmix_mixup else None


### Training

In [4]:
device = torch.device('cuda')

for fold in Config.folds:
    print(f"Training fold {fold}")
    # Read data
    train_df, valid_df, class_weights = get_fold(metadata, fold, up_thr=Config.upsample_thr)

    # Dataloaders
    train_dataset = AudioDataset(train_df, Config, waveform_transforms=waveform_transforms, spec_transforms=spec_transforms)
    val_dataset = AudioDataset(valid_df, Config, waveform_transforms=None, spec_transforms=None)
    train_loader = DataLoader(train_dataset, batch_size=Config.batch_size, shuffle=True, num_workers=4, collate_fn=collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=Config.batch_size, shuffle=False, num_workers=4)

    # Model and optim
    model = Config.Model(Config.n_classes, Config.model_name, n_mels=Config.n_mels).to(device)
    optimizer = torch.optim.Adam(model.parameters(), weight_decay=Config.weight_decay, lr=Config.lr)
    spe = len(train_loader)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=spe*Config.warmup_epochs, num_training_steps=spe*Config.num_epochs, 
                                                start_lr=Config.start_lr, final_lr=Config.final_lr)
    
    # Losses and metrics
    pos_weight = torch.tensor(class_weights).to(device) if Config.use_class_weights else None
    if Config.loss == 'crossentropy':
        criterion = nn.CrossEntropyLoss(label_smoothing=Config.label_smoothing, weight=pos_weight)
        accuracy = MulticlassAccuracy(num_classes=Config.n_classes).to(device)
    elif Config.loss == 'bce':
        criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight, weight=None)
        accuracy = MultilabelAccuracy(num_labels=Config.n_classes).to(device)

    focal_criterion = FocalLoss(gamma=Config.focal_gamma, pos_weight=pos_weight)
    focal2way_criterion = BCEFocal2WayLoss(gamma=Config.focal_gamma, pos_weight=pos_weight)

    # Start training
    start_time = time.time()
    if Config.wandb:
        run = wandb_init(fold, Config)

    save_dir = f"{Config.base_dir}checkpoints/{Config.run_name}" + f"_fold-{fold}"
    train_losses = []
    val_losses = []
    train_metrics = {'AUC': [], 'Accuracy': [], 'Score': []}
    val_metrics = {'AUC': [], 'Accuracy': [], 'Score': []}

    for epoch in range(Config.num_epochs):
        train_loss, train_accuracy, gt, preds = train_one_epoch(Config, model, train_loader, device, optimizer, scheduler, 
                                                                criterion, accuracy, focal_criterion, focal2way_criterion)
        train_losses.append(train_loss)
        train_metrics["Accuracy"].append(train_accuracy)
        train_auc = roc_auc(preds, gt)
        train_score = score_np(preds, gt)
        train_metrics["AUC"].append(train_auc)
        train_metrics["Score"].append(train_score)


        val_loss, val_accuracy, gt, preds = eval_one_epoch(Config, model, val_loader, device, criterion, 
                                                         accuracy, focal_criterion, focal2way_criterion)
        val_losses.append(val_loss)
        val_metrics['Accuracy'].append(val_accuracy)
        val_auc = roc_auc(preds, gt)
        val_score = score_np(preds, gt)
        val_metrics['AUC'].append(val_auc)
        val_metrics['Score'].append(val_score)

        save_dict = {
            "model": model.state_dict(),
            "optimizer": optimizer.state_dict(),
            "scheduler": scheduler.state_dict(),
            "epoch": epoch+1,
            "train_losses": train_losses,
            "train_metrics": train_metrics,
            "val_losses": val_losses,
            "val_metrics": val_metrics
        }

        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        torch.save(save_dict, save_dir + "/checkpoint.pth")
        with open(save_dir + "/logs.txt", "w") as f:
            f.write(f"Epoch {epoch+1}: Train Loss = {train_loss:.3f} | Val Loss = {val_loss:.3f}")
            f.write("\n")
            f.write("CONFIG:")
            for k,v in dict(vars(Config)).items():
                if '__' not in k:
                    f.write("\n")
                    f.write(f"{k}: {v}")


        if Config.wandb:
            wandb.log({
                "train_loss": train_loss,
                "train accuracy": train_accuracy,
                "train_auc": train_auc,
                "train_score": train_score,
                "val_loss": val_loss,
                "val_accuracy": val_accuracy,
                "val_auc": val_auc,
                "val_score": val_score,
                "lr": scheduler.get_last_lr()
            })


        print(f'Epoch {epoch+1}: Train Loss = {train_loss:.3f}, Train Accuracy = {train_accuracy:.3f}, Train ROCAUC = {train_auc:.3f},\
    Train score = {train_score:.3f} | Val Loss = {val_loss:.3f}, Val Accuracy = {val_accuracy:.3f}, \
    Val ROCAUC = {val_auc:.3f}, Val score = {val_score:.3f}')


    def format_duration(seconds):
        hours, remainder = divmod(seconds, 3600)
        minutes, seconds = divmod(remainder, 60)
        return "{:02}h {:02}min {:02}s".format(int(hours), int(minutes), int(seconds))

    print(f'Done in {format_duration(time.time() - start_time)}')

    if Config.wandb:
        #print('# WandB')
        #log_wandb(valid_df)
        wandb.run.finish()

Training fold 2
Num Train: 20162, 182 classes | Num Valid: 4892, 182 classes


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mcvincent13[0m ([33m667[0m). Use [1m`wandb login --relogin`[0m to force relogin


train loss: 5.888: 100%|██████████| 1009/1009 [10:09<00:00,  1.66it/s]
val loss: 3.273: 100%|██████████| 245/245 [01:19<00:00,  3.06it/s]


Epoch 1: Train Loss = 5.559, Train Accuracy = 0.000, Train ROCAUC = 0.539,    Train score = 0.539 | Val Loss = 4.609, Val Accuracy = 0.000,     Val ROCAUC = 0.763, Val score = 0.763


train loss: 4.972: 100%|██████████| 1009/1009 [10:04<00:00,  1.67it/s]
val loss: 2.061: 100%|██████████| 245/245 [01:17<00:00,  3.14it/s]


Epoch 2: Train Loss = 4.440, Train Accuracy = 0.000, Train ROCAUC = 0.797,    Train score = 0.797 | Val Loss = 3.226, Val Accuracy = 0.000,     Val ROCAUC = 0.931, Val score = 0.931


train loss: 5.002: 100%|██████████| 1009/1009 [10:07<00:00,  1.66it/s]
val loss: 2.082: 100%|██████████| 245/245 [01:18<00:00,  3.14it/s]


Epoch 3: Train Loss = 3.648, Train Accuracy = 0.000, Train ROCAUC = 0.902,    Train score = 0.902 | Val Loss = 2.735, Val Accuracy = 0.000,     Val ROCAUC = 0.959, Val score = 0.959


train loss: 4.235: 100%|██████████| 1009/1009 [10:05<00:00,  1.67it/s]
val loss: 2.214: 100%|██████████| 245/245 [01:17<00:00,  3.15it/s]


Epoch 4: Train Loss = 3.175, Train Accuracy = 0.001, Train ROCAUC = 0.940,    Train score = 0.940 | Val Loss = 2.514, Val Accuracy = 0.000,     Val ROCAUC = 0.969, Val score = 0.969


train loss: 4.362: 100%|██████████| 1009/1009 [10:09<00:00,  1.66it/s]
val loss: 2.179: 100%|██████████| 245/245 [01:17<00:00,  3.16it/s]


Epoch 5: Train Loss = 2.838, Train Accuracy = 0.001, Train ROCAUC = 0.959,    Train score = 0.959 | Val Loss = 2.372, Val Accuracy = 0.001,     Val ROCAUC = 0.973, Val score = 0.973


train loss: 4.196: 100%|██████████| 1009/1009 [10:03<00:00,  1.67it/s]
val loss: 2.270: 100%|██████████| 245/245 [01:17<00:00,  3.15it/s]


Epoch 6: Train Loss = 2.567, Train Accuracy = 0.002, Train ROCAUC = 0.968,    Train score = 0.968 | Val Loss = 2.231, Val Accuracy = 0.000,     Val ROCAUC = 0.975, Val score = 0.975


train loss: 3.759: 100%|██████████| 1009/1009 [09:59<00:00,  1.68it/s]
val loss: 2.322: 100%|██████████| 245/245 [01:17<00:00,  3.16it/s]


Epoch 7: Train Loss = 2.381, Train Accuracy = 0.002, Train ROCAUC = 0.975,    Train score = 0.975 | Val Loss = 2.156, Val Accuracy = 0.001,     Val ROCAUC = 0.977, Val score = 0.977


train loss: 3.837: 100%|██████████| 1009/1009 [10:05<00:00,  1.67it/s]
val loss: 2.500: 100%|██████████| 245/245 [01:17<00:00,  3.16it/s]


Epoch 8: Train Loss = 2.277, Train Accuracy = 0.002, Train ROCAUC = 0.977,    Train score = 0.977 | Val Loss = 2.143, Val Accuracy = 0.001,     Val ROCAUC = 0.976, Val score = 0.976


train loss: 3.545: 100%|██████████| 1009/1009 [10:00<00:00,  1.68it/s]
val loss: 2.621: 100%|██████████| 245/245 [01:17<00:00,  3.17it/s]


Epoch 9: Train Loss = 2.210, Train Accuracy = 0.002, Train ROCAUC = 0.980,    Train score = 0.980 | Val Loss = 2.137, Val Accuracy = 0.001,     Val ROCAUC = 0.976, Val score = 0.976
Done in 01h 43min 23s


0,1
train accuracy,▂▁▂▄▅▇▆██
train_auc,▁▅▇▇█████
train_loss,█▆▄▃▂▂▁▁▁
train_score,▁▅▇▇█████
val_accuracy,▁▄▄▃█▅▆▆▆
val_auc,▁▇▇██████
val_loss,█▄▃▂▂▁▁▁▁
val_score,▁▇▇██████

0,1
train accuracy,0.00203
train_auc,0.9803
train_loss,2.21003
train_score,0.9803
val_accuracy,0.00053
val_auc,0.97617
val_loss,2.13691
val_score,0.97617


Training fold 3
Num Train: 20168, 182 classes | Num Valid: 4892, 182 classes


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.


train loss: 5.005: 100%|██████████| 1009/1009 [10:08<00:00,  1.66it/s]
val loss: 3.805: 100%|██████████| 245/245 [01:09<00:00,  3.52it/s]


Epoch 1: Train Loss = 5.547, Train Accuracy = 0.000, Train ROCAUC = 0.538,    Train score = 0.538 | Val Loss = 4.619, Val Accuracy = 0.000,     Val ROCAUC = 0.749, Val score = 0.749


train loss: 3.988: 100%|██████████| 1009/1009 [10:16<00:00,  1.64it/s]
val loss: 2.201: 100%|██████████| 245/245 [01:09<00:00,  3.53it/s]


Epoch 2: Train Loss = 4.449, Train Accuracy = 0.000, Train ROCAUC = 0.800,    Train score = 0.800 | Val Loss = 3.228, Val Accuracy = 0.000,     Val ROCAUC = 0.918, Val score = 0.918


train loss: 4.210: 100%|██████████| 1009/1009 [10:09<00:00,  1.66it/s]
val loss: 2.458: 100%|██████████| 245/245 [01:09<00:00,  3.53it/s]


Epoch 3: Train Loss = 3.638, Train Accuracy = 0.001, Train ROCAUC = 0.905,    Train score = 0.905 | Val Loss = 2.778, Val Accuracy = 0.000,     Val ROCAUC = 0.949, Val score = 0.949


train loss: 3.278: 100%|██████████| 1009/1009 [10:03<00:00,  1.67it/s]
val loss: 2.362: 100%|██████████| 245/245 [01:09<00:00,  3.52it/s]


Epoch 4: Train Loss = 3.162, Train Accuracy = 0.001, Train ROCAUC = 0.942,    Train score = 0.942 | Val Loss = 2.517, Val Accuracy = 0.000,     Val ROCAUC = 0.962, Val score = 0.962


train loss: 4.203: 100%|██████████| 1009/1009 [10:09<00:00,  1.66it/s]
val loss: 2.081: 100%|██████████| 245/245 [01:09<00:00,  3.53it/s]


Epoch 5: Train Loss = 2.798, Train Accuracy = 0.001, Train ROCAUC = 0.959,    Train score = 0.959 | Val Loss = 2.403, Val Accuracy = 0.001,     Val ROCAUC = 0.961, Val score = 0.961


train loss: 2.554: 100%|██████████| 1009/1009 [10:07<00:00,  1.66it/s]
val loss: 2.046: 100%|██████████| 245/245 [01:09<00:00,  3.51it/s]


Epoch 6: Train Loss = 2.545, Train Accuracy = 0.002, Train ROCAUC = 0.969,    Train score = 0.969 | Val Loss = 2.265, Val Accuracy = 0.001,     Val ROCAUC = 0.964, Val score = 0.964


train loss: 1.743: 100%|██████████| 1009/1009 [10:01<00:00,  1.68it/s]
val loss: 1.964: 100%|██████████| 245/245 [01:09<00:00,  3.53it/s]


Epoch 7: Train Loss = 2.355, Train Accuracy = 0.002, Train ROCAUC = 0.976,    Train score = 0.976 | Val Loss = 2.218, Val Accuracy = 0.001,     Val ROCAUC = 0.963, Val score = 0.963


train loss: 2.537: 100%|██████████| 1009/1009 [10:08<00:00,  1.66it/s]
val loss: 2.078: 100%|██████████| 245/245 [01:09<00:00,  3.52it/s]


Epoch 8: Train Loss = 2.244, Train Accuracy = 0.002, Train ROCAUC = 0.981,    Train score = 0.981 | Val Loss = 2.187, Val Accuracy = 0.001,     Val ROCAUC = 0.963, Val score = 0.963


train loss: 2.006: 100%|██████████| 1009/1009 [10:13<00:00,  1.65it/s]
val loss: 2.050: 100%|██████████| 245/245 [01:09<00:00,  3.52it/s]


Epoch 9: Train Loss = 2.194, Train Accuracy = 0.002, Train ROCAUC = 0.982,    Train score = 0.982 | Val Loss = 2.193, Val Accuracy = 0.001,     Val ROCAUC = 0.964, Val score = 0.964
Done in 01h 42min 37s


0,1
train accuracy,▁▂▃▄▆▇█▇█
train_auc,▁▅▇▇█████
train_loss,█▆▄▃▂▂▁▁▁
train_score,▁▅▇▇█████
val_accuracy,▁▃▄▄▅██▇▇
val_auc,▁▇███████
val_loss,█▄▃▂▂▁▁▁▁
val_score,▁▇███████

0,1
train accuracy,0.00189
train_auc,0.98162
train_loss,2.19359
train_score,0.98162
val_accuracy,0.00088
val_auc,0.96356
val_loss,2.19274
val_score,0.96356


Training fold 4
Num Train: 20168, 182 classes | Num Valid: 4891, 182 classes


Unexpected keys (bn2.bias, bn2.num_batches_tracked, bn2.running_mean, bn2.running_var, bn2.weight, classifier.bias, classifier.weight, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.


train loss: 5.248: 100%|██████████| 1009/1009 [10:06<00:00,  1.66it/s]
val loss: 3.353: 100%|██████████| 245/245 [01:19<00:00,  3.09it/s]


Epoch 1: Train Loss = 5.567, Train Accuracy = 0.000, Train ROCAUC = 0.528,    Train score = 0.528 | Val Loss = 4.691, Val Accuracy = 0.000,     Val ROCAUC = 0.734, Val score = 0.734


train loss: 3.827: 100%|██████████| 1009/1009 [10:04<00:00,  1.67it/s]
val loss: 2.248: 100%|██████████| 245/245 [01:19<00:00,  3.08it/s]


Epoch 2: Train Loss = 4.493, Train Accuracy = 0.000, Train ROCAUC = 0.796,    Train score = 0.796 | Val Loss = 3.332, Val Accuracy = 0.000,     Val ROCAUC = 0.911, Val score = 0.911


train loss: 3.277: 100%|██████████| 1009/1009 [10:07<00:00,  1.66it/s]
val loss: 2.205: 100%|██████████| 245/245 [01:19<00:00,  3.08it/s]


Epoch 3: Train Loss = 3.673, Train Accuracy = 0.001, Train ROCAUC = 0.905,    Train score = 0.905 | Val Loss = 2.819, Val Accuracy = 0.000,     Val ROCAUC = 0.944, Val score = 0.944


train loss: 3.455: 100%|██████████| 1009/1009 [10:06<00:00,  1.66it/s]
val loss: 2.297: 100%|██████████| 245/245 [01:19<00:00,  3.07it/s]


Epoch 4: Train Loss = 3.171, Train Accuracy = 0.001, Train ROCAUC = 0.941,    Train score = 0.941 | Val Loss = 2.575, Val Accuracy = 0.000,     Val ROCAUC = 0.957, Val score = 0.957


train loss: 2.270: 100%|██████████| 1009/1009 [10:05<00:00,  1.67it/s]
val loss: 2.383: 100%|██████████| 245/245 [01:19<00:00,  3.08it/s]


Epoch 5: Train Loss = 2.826, Train Accuracy = 0.001, Train ROCAUC = 0.959,    Train score = 0.959 | Val Loss = 2.472, Val Accuracy = 0.000,     Val ROCAUC = 0.957, Val score = 0.957


train loss: 1.837: 100%|██████████| 1009/1009 [10:09<00:00,  1.66it/s]
val loss: 1.986: 100%|██████████| 245/245 [01:19<00:00,  3.07it/s]


Epoch 6: Train Loss = 2.586, Train Accuracy = 0.001, Train ROCAUC = 0.968,    Train score = 0.968 | Val Loss = 2.322, Val Accuracy = 0.000,     Val ROCAUC = 0.960, Val score = 0.960


train loss: 3.039: 100%|██████████| 1009/1009 [10:06<00:00,  1.66it/s]
val loss: 2.000: 100%|██████████| 245/245 [01:19<00:00,  3.08it/s]


Epoch 7: Train Loss = 2.385, Train Accuracy = 0.001, Train ROCAUC = 0.976,    Train score = 0.976 | Val Loss = 2.270, Val Accuracy = 0.000,     Val ROCAUC = 0.956, Val score = 0.956


train loss: 1.536: 100%|██████████| 1009/1009 [10:05<00:00,  1.67it/s]
val loss: 2.110: 100%|██████████| 245/245 [01:19<00:00,  3.07it/s]


Epoch 8: Train Loss = 2.277, Train Accuracy = 0.002, Train ROCAUC = 0.978,    Train score = 0.978 | Val Loss = 2.243, Val Accuracy = 0.000,     Val ROCAUC = 0.959, Val score = 0.959


train loss: 2.990: 100%|██████████| 1009/1009 [10:07<00:00,  1.66it/s]
val loss: 2.096: 100%|██████████| 245/245 [01:19<00:00,  3.07it/s]


Epoch 9: Train Loss = 2.214, Train Accuracy = 0.002, Train ROCAUC = 0.981,    Train score = 0.981 | Val Loss = 2.229, Val Accuracy = 0.000,     Val ROCAUC = 0.961, Val score = 0.961
Done in 01h 43min 50s


0,1
train accuracy,▁▁▄▃▅▆▆▇█
train_auc,▁▅▇▇█████
train_loss,█▆▄▃▂▂▁▁▁
train_score,▁▅▇▇█████
val_accuracy,▁▃▃▄▄▅█▆▆
val_auc,▁▆▇██████
val_loss,█▄▃▂▂▁▁▁▁
val_score,▁▆▇██████

0,1
train accuracy,0.0019
train_auc,0.98057
train_loss,2.21388
train_score,0.98057
val_accuracy,0.00035
val_auc,0.96081
val_loss,2.22909
val_score,0.96081


# Optimize for inference

In [8]:
import openvino as ov
import nncf

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, openvino


In [8]:
# Load checkpoint
device = torch.device('cpu')

model = Config.Model(Config.n_classes, pretrained=False, model_name=Config.model_name, n_mels=Config.n_mels).to(device)

#save_dir = f"{Config.base_dir}checkpoints/{Config.run_name}"
save_dir = 'checkpoints/2024-05-25_01-03-08_256x256_tf_efficientnetv2_s_fold-4'
checkpoint_name = f'{save_dir}/checkpoint.pth'
checkpoint_ov = f'{save_dir}/checkpoint.xml'
    
checkpoint = torch.load(checkpoint_name, map_location='cpu')
model.load_state_dict(checkpoint['model'])
model.eval()

SEDClassifier(
  (bn0): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (encoder): EfficientNetFeatures(
    (conv_stem): Conv2dSame(3, 24, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNormAct2d(
      24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (drop_path): Identity()
        )
        (1): ConvBnAct(
          (conv): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNormAct2d(
            24, eps=0.001, momentum=0.1, affine=True, tr

In [9]:
# Convert and save model for openvino
input_data = torch.rand(1, 3, Config.n_mels, Config.target_length)
ov_model = ov.convert_model(model, example_input=input_data)
ov.save_model(ov_model, save_dir + '/checkpoint.xml')

In [13]:
# Read and compile model with openvino
core = ov.Core()
ov_model = core.read_model(save_dir + "/checkpoint.xml")
compiled_model = ov.compile_model(ov_model)

In [12]:
# Quantize model to 8 bits openvino

val_loader

<torch.utils.data.dataloader.DataLoader at 0x770f57433e50>

In [11]:
target = torch.zeros((8,182))
input = torch.zeros((8,182))