In [None]:
!pip install -q noisereduce
PATH_DATASET = "/kaggle/input/birdclef-2022"

In [None]:
import os
import pandas as pd

path_csv = os.path.join(PATH_DATASET, "train_metadata.csv")
train_meta = pd.read_csv(path_csv).sample(frac=1)
display(train_meta.head())

In [None]:
os.cpu_count()

In [None]:
import torchaudio
from tqdm.auto import tqdm
from joblib import Parallel, delayed


# Function for slice audio

In [None]:
import torch
import noisereduce as nr
from torch.utils.data import DataLoader
from math import ceil

# STFT
device = "cpu"
n_fft = 1024
win_length = 1024
hop_length = 512
transform = torchaudio.transforms.Spectrogram(
    n_fft = n_fft,           # freqGroup = n_fft//2 + 1
    win_length = win_length, # freq gap for each group
    hop_length = hop_length, # length = samples / hop_length
    center = True,
    pad_mode = 'reflect',
    power=2.0
).to(device)

@torch.no_grad()
def create_spectrogram(
    fname: str,
    reduce_noise: bool = False,
    frame_size: int = 5,
    frame_step: int = 5,
    channel: int = 0,
    device = "cpu",
    batch_size=5,
):
    
    waveform, sample_rate = torchaudio.load(fname)
       
    # Change singal to mono
    if waveform.shape[0] > 1:
        waveform = torch.mean(waveform, axis=0, keepdim=True)
        
    # Reduce noise
    if reduce_noise:
        waveform = torch.tensor(nr.reduce_noise(
            y=waveform,
            sr=sample_rate,
            win_length=transform.win_length,
            use_tqdm=False,
            n_jobs=2,
        ))
        
    step = int(frame_step * sample_rate)
    size = int(frame_size * sample_rate)
    frames = []
    for i in range(ceil((waveform.size()[-1] - size) / step)):
        begin = i * step
        frame = waveform[channel][begin:begin + size]
        if len(frame) < size:
            if i == 0:
                rep = round(float(size) / len(frame))
                frame = frame.repeat(int(rep))
            elif len(frame) < (size * 0.33):
                continue
            else:
                frame = waveform[channel][-size:]
        frames.append(frame)
    else:
        frame = waveform[channel]
        rep = ceil(float(size) / len(frame))
        frame = frame.repeat(int(rep))[0:size]
        frames.append(frame)

    if not frames:
        return torch.Tensor()
    else:
        return torch.nan_to_num(torch.log(transform(torch.stack(frames))).to(torch.float16))

path_audio = os.path.join(PATH_DATASET, "train_audio", train_meta["filename"][5])
print(path_audio)

segSpec = create_spectrogram(path_audio, reduce_noise=True)

print(segSpec.min(),segSpec.max())

In [None]:
# value in -100~20
# for i in range(100):
#     segSpec = create_spectrogram(train_meta["filename"][i], reduce_noise=True)
#     if segSpec.shape[0]!=0:
#         print(segSpec.max(),segSpec.min())

In [None]:
import numpy as np
def save_seg_spectrum(segSpec,birdName,fileName):
    if segSpec.shape[0]!=0:
        pathExport = os.path.join('./', "train_np", birdName)
        os.makedirs(pathExport, exist_ok=True)
        filename = os.path.join('./', "train_np", fileName)

        for i,data in enumerate(segSpec):
            outputPath = filename.replace('.ogg' , '_'+str(i)+'.npy')
            np.save(outputPath,data)
    return fileName,segSpec.shape[0]
        
# save_seg_spectrum(segSpec,train_meta["primary_label"][2],train_meta["filename"][2])

In [None]:
def read_Export(index,birdName,fileName):
    path_audio = os.path.join(PATH_DATASET, "train_audio", fileName)
    segSpec = create_spectrogram(path_audio, reduce_noise=True)
    return  save_seg_spectrum(segSpec,birdName,fileName)


# read_Export(0,train_meta["primary_label"][0],train_meta["filename"][0])

# Filter data

In [None]:
train_meta = pd.read_csv(path_csv)
train_meta

In [None]:
def getLength(fn):
    fp = os.path.join(PATH_DATASET, "train_audio", fn)
    metadata = torchaudio.info(fp)
    return metadata.num_frames

train_meta['duration'] = Parallel(n_jobs=os.cpu_count())(delayed(getLength)(fn) for fn in tqdm(train_meta["filename"]))
train_meta['duration'] = train_meta['duration']/32000
train_meta

## Delete low rating data

In [None]:
train_meta_filter1 = train_meta[train_meta['rating']>2]
train_meta_filter1.shape

## Delete long/enough data

In [None]:
# these bird audio is enough and too long
train_meta_filter1[train_meta_filter1['duration']>800]

In [None]:
train_meta_filter2 = train_meta_filter1[train_meta_filter1['duration']<800]
train_meta_filter2.shape

In [None]:
# 用于将分组数据输出成临时文件，方便观察
# train_meta_filter2[['primary_label','duration']].groupby('primary_label').sum().to_csv('temp.csv')


In [None]:
groupDuration = train_meta_filter2[['primary_label','duration']].groupby('primary_label').sum()
# groupDuration[groupDuration['duration']>3000]
groupDuration

In [None]:
groupDuration['duration'].describe()

## delete too much data

In [None]:
import random

train_meta_filter3 = train_meta_filter2.copy()
deleteData = groupDuration[groupDuration['duration']>500]-500
for index,row in deleteData.iterrows():
    temp = 0
    deleteIndex = train_meta_filter3[train_meta_filter3['primary_label']==row.name].index
    tempTop = row.duration
    while(temp<tempTop):
        dI = random.randint(0,len(deleteIndex)-1)
        temp =temp + train_meta_filter3[train_meta_filter3.index==deleteIndex[dI]].duration.item()
        train_meta_filter3.drop(deleteIndex[dI],axis=0,inplace=True)
        deleteIndex = deleteIndex.delete(dI)
#     print(
#     break

In [None]:
groupDuration3= train_meta_filter3[['primary_label','duration']].groupby('primary_label').sum()
# groupDuration[groupDuration['duration']>3000]
groupDuration3.describe()

In [None]:
groupDuration3.sum().item()

In [None]:
((groupDuration3.sum().item()/5)*321)/(1024*1024)

# Get Data

In [None]:
train_meta_final = train_meta_filter3.reset_index(drop=True)
train_meta_final

In [None]:
from tqdm.notebook import tqdm
SegInfo = Parallel(n_jobs=os.cpu_count(),verbose=0)(
    delayed(read_Export)(index,row.primary_label,row.filename) for index,row in tqdm(train_meta_final.iterrows(),total=train_meta_final.shape[0]))

# Get metadata

In [None]:
pd.merge(train_meta_final,pd.DataFrame(SegInfo,columns=['filename','segment_num']),how='outer',on='filename').to_csv('filter_metadata.csv')

In [None]:
from IPython.display import FileLink
FileLink(r'train_np.tar.gz')

In [None]:
from IPython.display import FileLink
FileLink(r'train_np.tar.gz')