In [None]:
import os
from pathlib import Path
from types import SimpleNamespace

cfg = SimpleNamespace(**{})
cfg.num_folds = 5
cfg.gpu = "7"

cfg.seed = 2024

cfg.input_path = Path('../input')
cfg.comp_data_path = cfg.input_path / 'birdclef-2024'
cfg.save_path = Path('../checkpoints')

cfg.logger_file = True


cfg.sr = 32000

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from tqdm import tqdm
from logging import getLogger, INFO, StreamHandler, FileHandler, Formatter
import gc
import pickle as pkl

import librosa

from torch.utils.data import DataLoader, Dataset
import torchaudio
import torchaudio.transforms as T
#import torch_audiomentations as tA

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler
from torch.optim import lr_scheduler, Adam, AdamW

import timm

from glob import glob

In [None]:
cfg.device = torch.device('cuda')

In [None]:
train = pd.read_csv(cfg.comp_data_path / 'train_metadata.csv')
train

In [None]:
def load_audio(filename, cfg):
    filepath = cfg.comp_data_path / 'train_audio' / filename
    audio = librosa.load(filepath, sr=cfg.sr)[0].astype(np.float32)
    return audio

In [None]:
load_audio('asbfly/XC134896.ogg', cfg)

In [None]:
! mkdir ../input/birdclef_data

In [None]:
dirnames = []
lengths = []
for fname in tqdm(train.filename):
    audio = load_audio(fname, cfg)
    file = fname.split('/')[-1].split('.')[0]
    dirname = fname.split('/')[0]
    save_path = cfg.input_path / 'birdclef_data' / dirname
    #if dirname not in dirnames:
    #    save_path.mkdir()
    #    dirnames.append(dirname)
    np.save(save_path / ('first10_' + file), audio[: 10 * cfg.sr])
    np.save(save_path / ('last10_' + file), audio[-10 * cfg.sr : ])
    lengths.append(audio.shape[0])

In [None]:
_ = plt.hist(np.array(lengths) / cfg.sr, bins=100, log=True)

In [None]:
! mkdir ../input/birdclef_data/unlabeled_soundscapes

In [None]:
def load_soundscape(pathname, cfg):
    audio = librosa.load(pathname, sr=32000)[0].astype(np.float32)
    return audio

In [None]:
savepath = cfg.input_path / 'birdclef_data' / 'unlabeled_soundscapes'
for pathname in (tqdm(glob(str(cfg.comp_data_path / 'unlabeled_soundscapes/*.ogg')))):
    filename = pathname.split('/')[-1].split('.')[0]
    waveform = load_soundscape(pathname, cfg)
    np.save(savepath / filename, waveform)


In [None]:
train[train.primary_label == 'integr']

In [None]:
train[train.primary_label == 'blaeag1']