In [1]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import DataLoader

from src.data import AudioDatasetInference
from src.models import BasicClassifier
from src.utils import score
from src.data_utils import get_fold, get_metadata_from_csv

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
class Config:
    duration = 10
    sample_rate = 32000
    target_length = 384
    n_mels = 128
    n_fft = 2028
    window = 2028
    audio_len = duration*sample_rate
    hop_length = audio_len // (target_length-1)
    fmin = 20
    fmax = 16000
    top_db = 80

    n_classes = 182
    model_name = 'efficientnet_v2_s'
    checkpoint = 'checkpoints/2024-05-05_17-31-54_fold-0_dim-128x384_model-efficientnet_v2_s/checkpoint.pth'

    standardize = False
    dataset_mean = [-16.8828]
    dataset_std = [12.4019]

    loss = 'bce'

In [12]:
base_dir = 'data'
train_dir = base_dir + '/train_audio/'
test_dir = base_dir + '/test_soundscapes/'
unlabeled_dir = base_dir + '/unlabeled_soundscapes/'

class_names = sorted(os.listdir(train_dir))
n_classes = len(class_names)
class_labels = list(range(n_classes))
label2name = dict(zip(class_labels, class_names))
name2label = {v:k for k,v in label2name.items()}

metadata = get_metadata_from_csv('metadata.csv')

In [13]:
fold = 0
train_df, valid_df, class_weights = get_fold(metadata, fold)
files = valid_df['filepath']
targets = valid_df['target']

Num Train: 19567, 182 classes | Num Valid: 4892, 182 classes


In [14]:
test_dataset = AudioDatasetInference(
    files, 
    targets=None, 
    n_classes=Config.n_classes,
    duration=5,
    sample_rate=Config.sample_rate,
    target_length=Config.target_length,
    n_mels=Config.n_mels,
    n_fft=Config.n_fft,
    window=Config.window,
    hop_length=Config.hop_length,
    fmin=Config.fmin,
    fmax=Config.fmax,
    top_db=Config.top_db,
    standardize=Config.standardize,
    mean=Config.dataset_mean,
    std=Config.dataset_std
    )

In [15]:
device = torch.device('cuda')

model = BasicClassifier(n_classes, pretrained=False, model_name=Config.model_name).to(device)
checkpoint_name = Config.checkpoint
checkpoint = torch.load(checkpoint_name, map_location='cpu')
model.load_state_dict(checkpoint['model'])
model = torch.jit.optimize_for_inference(torch.jit.script(model.eval()))

In [16]:
ids = []
preds = np.empty(shape=(0, n_classes), dtype='float32')

test_iter = tqdm(range(len(test_dataset)))
for i in test_iter:
    specs, file = test_dataset[i]
    filename = file.split('/')[-1][:-4]
    specs = specs.to(device)
    
    with torch.no_grad():
        outs = model(specs)
        if Config.loss == 'crossentropy':
            outs = nn.functional.softmax(outs, dim=1).detach().cpu().numpy()
        elif Config.loss == 'bce':
            outs = outs.sigmoid().detach().cpu().numpy()

    frame_ids = [f'{filename}_{(frame_id+1)*5}' for frame_id in range(len(specs))]
    ids += frame_ids

    preds = np.concatenate([preds, outs], axis=0)

100%|██████████| 4892/4892 [07:37<00:00, 10.70it/s] 


In [17]:
pred_df = pd.DataFrame(ids, columns=['row_id'])
pred_df.loc[:, class_names] = preds

def get_target(row_id):
    name = row_id.split('_')[0]
    target = valid_df.loc[valid_df['filepath'].map(lambda x: x.split('/')[-1][:-4]) == name, 'target'].values[0]
    return target

pred_df['target'] = pred_df.row_id.map(get_target)
submission = pred_df[class_names]

solution = pd.DataFrame()
for class_name in class_names:
    # Create a new column where the value is 1 if the label matches the class index, and 0 otherwise
    solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)

  pred_df['target'] = pred_df.row_id.map(get_target)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df['target'] == name2label[class_name]).astype(int)
  solution[class_name] = (pred_df[

In [18]:
score(solution, submission, 'row_id')

0.6495929556132565

In [None]:
0.6599837509627534