In [1]:
import pickle
# feature extractoring and preprocessing data
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image
from pathlib import Path
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy import signal
import scipy
#Reports
from sklearn.metrics import classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

import os
import pydub

In [2]:
audio_path = Path('/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M')

In [3]:
sr = 32000

In [4]:
df = pd.read_csv('./train_extended.csv')
# Selecting high-rated sound only
dff = df[df['rating'] > 3.0]
# Selecting shorter files only, less data to process
dff = dff[df['duration'] < 20]
print(len(dff))

3954


In [5]:
# Selecting birds with more than 10 examples left
dfc = dff.groupby('ebird_code')['ebird_code'].count()
dff = dff[~dff['ebird_code'].isin(dfc[dfc.values < 20].index)]
print(len(dff))

2967


In [6]:
sound_categories = dff['ebird_code'].unique()

In [7]:
sound_categories

array(['amecro', 'amerob', 'barswa', 'bewwre', 'blujay', 'bnhcow',
       'cangoo', 'carwre', 'caster1', 'chispa', 'comrav', 'comred',
       'comter', 'comyel', 'daejun', 'easmea', 'eastow', 'eucdov',
       'eursta', 'gadwal', 'gnwtea', 'greegr', 'grtgra', 'grycat',
       'horlar', 'houfin', 'houspa', 'houwre', 'mallar3', 'marwre',
       'norcar', 'norfli', 'normoc', 'redcro', 'rewbla', 'savspa',
       'sonspa', 'spotow', 'swathr', 'tuftit', 'warvir', 'wesmea',
       'whtspa'], dtype=object)

In [8]:
mfcc_df = pd.DataFrame([], columns = ['ebird_code', 'mfcc_mean'])

In [10]:
from tqdm.notebook import tqdm

In [11]:
audios = []
Y = []
Y_classes = []
label = 0
category_progress = tqdm(sound_categories)
for category_name in category_progress:
    category_progress.desc = category_name
    #Walk through the dataframe filename values
    l_files = dff[dff['ebird_code'] == category_name]['filename'].values
    tqdm.write("Bird: "+category_name+"  files: "+str(len(l_files)))
    for file_name in l_files:
        try:
            sound_path = audio_path/category_name/file_name
            y, sr = librosa.load(sound_path, mono=True, sr=sr)
            mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T,axis=0)
            audios.append((category_name,mfcc))
        except Exception as e:
            pass

  0%|          | 0/43 [00:00<?, ?it/s]

Bird: amecro  files: 33
Bird: amerob  files: 55
Bird: barswa  files: 157
Bird: bewwre  files: 75
Bird: blujay  files: 62
Bird: bnhcow  files: 29
Bird: cangoo  files: 41
Bird: carwre  files: 80
Bird: caster1  files: 20
Bird: chispa  files: 29
Bird: comrav  files: 217
Bird: comred  files: 40
Bird: comter  files: 83
Bird: comyel  files: 53
Bird: daejun  files: 39
Bird: easmea  files: 25
Bird: eastow  files: 27
Bird: eucdov  files: 107
Bird: eursta  files: 137
Bird: gadwal  files: 29
Bird: gnwtea  files: 45
Bird: greegr  files: 43
Bird: grtgra  files: 26
Bird: grycat  files: 35
Bird: horlar  files: 40
Bird: houfin  files: 34
Bird: houspa  files: 190
Bird: houwre  files: 214
Bird: mallar3  files: 163
Bird: marwre  files: 33
Bird: norcar  files: 58
Bird: norfli  files: 27
Bird: normoc  files: 24
Bird: redcro  files: 299
Bird: rewbla  files: 96
Bird: savspa  files: 21
Bird: sonspa  files: 82
Bird: spotow  files: 49
Bird: swathr  files: 33
Bird: tuftit  files: 31
Bird: warvir  files: 27
Bird: 

In [12]:
audios

[('amecro',
  array([-333.71207  ,  -10.329064 , -185.3892   ,   -6.9985123,
          -55.323956 ,   21.749538 ,   -8.345579 ,   -4.0483046,
           -5.785289 ,   -8.088169 ,    3.3916748,   18.481867 ,
            7.627993 ], dtype=float32)),
 ('amecro',
  array([-310.532    ,   87.413315 ,  -41.90712  ,   -2.010652 ,
          -14.669964 ,   11.598625 ,   -2.1740491,    3.3270574,
           -3.2823744,    0.5799249,   -2.3528104,    9.613209 ,
           -0.9881407], dtype=float32)),
 ('amecro',
  array([-257.20612  ,   30.948402 ,  -92.14503  ,   -4.308688 ,
          -47.76808  ,   -5.709753 ,  -19.964533 ,  -22.101341 ,
          -20.08943  ,   -7.437179 ,   -2.8590574,   -6.633049 ,
           -4.495727 ], dtype=float32)),
 ('amecro',
  array([-409.79108 ,   99.99055 ,  -17.238626,  -23.202827,  -28.408302,
           -9.704721,  -13.144947,  -19.421942,  -21.50984 ,  -20.90471 ,
          -23.464935,  -16.597988,  -26.124681], dtype=float32)),
 ('amecro',
  array([-169.3741

In [32]:
df.loc[:-1]

Unnamed: 0,rating,playback_used,ebird_code,channels,date,duration,filename,species,title,secondary_labels,...,background,xc_id,url,country,author,primary_label,longitude,time,recordist,license
