In [1]:
import os
from pydub import AudioSegment
import pandas
import numpy
import requests
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
import gc

In [2]:
parent_path = ''
data_path = os.path.join(parent_path, 'data')
npz_path = os.path.join(parent_path, 'audio_npz')

In [3]:
birds = pandas.read_csv(os.path.join(data_path, 'birds_dataset.csv'), index_col=0)

In [4]:
birds.head()

Unnamed: 0,index,file_id,genus,species,name,download_url,file_name,duration,other_species_in_recording,quality
0,8,452850,Crypturellus,cinereus,Cinereous Tinamou,https://www.xeno-canto.org/452850/download,"XC452850-Cinereous Tinamou, song, 190117-023.mp3",53,"Poecilotriccus latirostris, Pheugopedius genib...",A
1,9,446610,Crypturellus,cinereus,Cinereous Tinamou,https://www.xeno-canto.org/446610/download,XC446610-Crypturellus cinereus - DaRec_2018052...,32,,A
2,10,424163,Crypturellus,cinereus,Cinereous Tinamou,https://www.xeno-canto.org/424163/download,XC424163-Crypturellus cinereus. RO. ME- Felipe...,36,"Sclerurus obscurior, Amazona farinosa",A
3,11,333322,Crypturellus,cinereus,Cinereous Tinamou,https://www.xeno-canto.org/333322/download,XC333322-CRYCIN13.mp3,51,"Thamnophilus aethiops, Hypocnemis ochrogyna, M...",A
4,12,329091,Crypturellus,cinereus,Cinereous Tinamou,https://www.xeno-canto.org/329091/download,XC329091-Crypturellus cinereus(song)XapuriAC23...,37,,A


In [5]:
genus_list = sorted(birds['genus'].unique().tolist())
n_classes = len(genus_list)
id_to_genus = {v: k for v, k in enumerate(genus_list)}
genus_to_id = {k: v for v, k in enumerate(genus_list)}
print('Number of unique Genera: {}'.format(n_classes))

Number of unique Genera: 182


In [6]:
import json
with open(os.path.join(data_path, 'id_to_genus.json'), 'w') as f:
    json.dump(id_to_genus, f)
with open(os.path.join(data_path, 'genus_to_id.json'), 'w') as f:
    json.dump(genus_to_id, f)

In [7]:
data_X = birds.download_url
data_y = birds.genus
X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, stratify=data_y, test_size=0.2, random_state=100)
assert len(y_train.unique()) == n_classes
assert len(y_test.unique()) == n_classes

In [8]:
X_train_1, y_train_1 = X_train[:1796], y_train[:1796]
X_train_2, y_train_2 = X_train[1796:3592], y_train[1796:3592]
X_train_3, y_train_3 = X_train[3592:5338], y_train[3592:5338]
X_train_4, y_train_4 = X_train[5338:7184], y_train[5338:7184]
X_train_5, y_train_5 = X_train[7184:8980], y_train[7184:8980]
X_train_6, y_train_6 = X_train[8980:], y_train[8980:]

In [9]:
del X_train
del y_train
gc.collect()

312

In [10]:
def transform(x):
    x_raw = librosa.core.db_to_power(x, ref=1.0)
    x_log = numpy.log(x_raw)
    return x_log

In [11]:
def convert(wav_audio_path):
    y, sr = librosa.load(wav_audio_path)
    spect = librosa.feature.melspectrogram(y=y, sr=sr,n_fft=2048, hop_length=512)
    spect = librosa.power_to_db(spect, ref=numpy.max)
    spect = spect.T
    return spect

In [12]:
def get_audio(url):
    try:
        download_url = url
        file_name = url.split('/')[3]
        mp3_path = os.path.join(parent_path, file_name+'.mp3')
        wav_path = os.path.join(parent_path, file_name+'.wav')
        r = requests.get(download_url, allow_redirects=True)
        with open(mp3_path,'wb') as op:
            op.write(r.content)
        sound = AudioSegment.from_mp3(mp3_path)[:20000]
        sound.export(wav_path, format="wav")
        audio = convert(wav_path)
        os.unlink(wav_path)
        os.unlink(mp3_path)
        if audio.shape[0] == 862:
            return True, audio
        else: 
            return False, None
    except Exception as e:
        print(e)
        return False, None

In [13]:
def save_npz(data_x, data_y, save_path):
    x_accumulator = numpy.empty((0, 862, 128))
    y_accumulator = numpy.empty((0, n_classes))
    for i, url in tqdm(data_x.items()):
        processed, audio = get_audio(url)
        if processed:
            x_accumulator = numpy.append(x_accumulator, [audio], axis=0)
            genus_class = genus_to_id[data_y[i]]
            onehot_class =  numpy.eye(n_classes)[genus_class]
            y_accumulator = numpy.concatenate((y_accumulator, onehot_class.reshape((1, 182))), axis=0)
    numpy.savez_compressed(save_path, train=x_accumulator, test=y_accumulator)

In [14]:
save_npz(X_train_5, y_train_5, os.path.join(npz_path, 'train_data_5'))

27it [03:05,  7.40s/it]

Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

b'ffmpeg version git-2020-02-27-9b22254 Copyright (c) 2000-2020 the FFmpeg developers\r\n  built with gcc 9.2.1 (GCC) 20200122\r\n  configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enabl

752it [52:26,  3.74s/it]

Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

b'ffmpeg version git-2020-02-27-9b22254 Copyright (c) 2000-2020 the FFmpeg developers\r\n  built with gcc 9.2.1 (GCC) 20200122\r\n  configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enabl

805it [55:47,  5.12s/it]

Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

b'ffmpeg version git-2020-02-27-9b22254 Copyright (c) 2000-2020 the FFmpeg developers\r\n  built with gcc 9.2.1 (GCC) 20200122\r\n  configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enabl

1059it [1:11:43,  5.00s/it]

Decoding failed. ffmpeg returned error code: 1

Output from ffmpeg/avlib:

b'ffmpeg version git-2020-02-27-9b22254 Copyright (c) 2000-2020 the FFmpeg developers\r\n  built with gcc 9.2.1 (GCC) 20200122\r\n  configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enabl

1796it [1:59:17,  3.99s/it]
