In [17]:
import pickle
import IPython.display as ipd
# feature extractoring and preprocessing data
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image
from pathlib import Path
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy import signal
import scipy
import noisereduce as nr
#Reports
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import os
import pydub

In [18]:
audio_path = Path('A-M')

In [19]:
e_file = 'A-M/amecro/XC264942.mp3'
ipd.display(ipd.Audio(e_file))
y, sr = librosa.load(e_file)
reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)

In [21]:
from scipy import signal
def f_high(y,sr):
    b,a = signal.butter(10, 2000/(sr/2), btype='highpass')
    yf = signal.lfilter(b,a,y)
    return yf

In [23]:
sr = 32000

In [24]:
df = pd.read_csv('./train_extended.csv')
# Selecting high-rated sound only
dff = df[df['rating'] > 3.0]
# Selecting shorter files only, less data to process
dff = dff[df['duration'] < 20]
print(len(dff))

3954


In [25]:
# Selecting birds with more than 10 examples left
dfc = dff.groupby('ebird_code')['ebird_code'].count()
dff = dff[~dff['ebird_code'].isin(dfc[dfc.values < 20].index)]
print(len(dff))

2967


In [26]:
sound_categories = dff['ebird_code'].unique()

In [54]:
sound_categories

array(['amecro', 'amerob', 'barswa', 'bewwre', 'blujay', 'bnhcow',
       'cangoo', 'carwre', 'caster1', 'chispa', 'comrav', 'comred',
       'comter', 'comyel', 'daejun', 'easmea', 'eastow', 'eucdov',
       'eursta', 'gadwal', 'gnwtea', 'greegr', 'grtgra', 'grycat',
       'horlar', 'houfin', 'houspa', 'houwre', 'mallar3', 'marwre',
       'norcar', 'norfli', 'normoc', 'redcro', 'rewbla', 'savspa',
       'sonspa', 'spotow', 'swathr', 'tuftit', 'warvir', 'wesmea',
       'whtspa'], dtype=object)

In [55]:
def reduce_noise(y, sr):
    reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)
    return f_high(reduced_noise, sr)

In [49]:
#Creating subfolders
sound_categories
subfolder_names = sound_categories
for subfolder_name in subfolder_names:
    os.makedirs(os.path.join('mel_cleaned_images', subfolder_name))

In [56]:
def mel_spectrogram_images(sound_path, category_name, file_name):
    y, y_sr = librosa.load(sound_path, mono=True)
        
    mel_spect = librosa.feature.melspectrogram(y=y, sr=y_sr, n_fft=2048, hop_length=1024)
    mel_spect = librosa.power_to_db(mel_spect, ref=np.max)
    librosa.display.specshow(mel_spect, fmax=8000);
    #plt.title('Mel Spectrogram');
    #plt.colorbar(format='%+2.0f dB');
    
    
    # As png
    plt.savefig('mel_cleaned_images/'+ category_name + '/' + file_name +'.png')

In [None]:
#category_progress = tqdm(sound_categories)
for category_name in sound_categories:
    #Walk through the dataframe filename values
    l_files = dff[dff['ebird_code'] == category_name]['filename'].values
    print("Bird: "+category_name+"  files: "+str(len(l_files)))
    print(l_files)
    for file_name in l_files:
        try:
            sound_path = audio_path/category_name/file_name
            print(sound_path)
            mel_spectrogram_images(sound_path, category_name, file_name.strip('.mp3'))
        except Exception as e:
            print(e)
            pass

Bird: amecro  files: 33
['XC572379.mp3' 'XC556240.mp3' 'XC550085.mp3' 'XC531008.mp3'
 'XC516376.mp3' 'XC578154.mp3' 'XC563629.mp3' 'XC533366.mp3'
 'XC464018.mp3' 'XC460872.mp3' 'XC454607.mp3' 'XC453754.mp3'
 'XC425503.mp3' 'XC425502.mp3' 'XC394324.mp3' 'XC357303.mp3'
 'XC344949.mp3' 'XC327683.mp3' 'XC306320.mp3' 'XC304860.mp3'
 'XC291509.mp3' 'XC291508.mp3' 'XC287852.mp3' 'XC287325.mp3'
 'XC264942.mp3' 'XC253628.mp3' 'XC205241.mp3' 'XC205240.mp3'
 'XC193114.mp3' 'XC152674.mp3' 'XC149940.mp3' 'XC144670.mp3' 'XC58475.mp3']
A-M\amecro\XC572379.mp3
A-M\amecro\XC556240.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\amecro\XC550085.mp3
A-M\amecro\XC531008.mp3
A-M\amecro\XC516376.mp3
A-M\amecro\XC578154.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\amecro\XC563629.mp3
A-M\amecro\XC533366.mp3
A-M\amecro\XC464018.mp3
A-M\amecro\XC460872.mp3
A-M\amecro\XC454607.mp3
A-M\amecro\XC453754.mp3
Input signal length=0 is too small to resample from 480

A-M\barswa\XC584488.mp3
A-M\barswa\XC583046.mp3
A-M\barswa\XC582626.mp3
A-M\barswa\XC579896.mp3
A-M\barswa\XC578054.mp3
A-M\barswa\XC577626.mp3
A-M\barswa\XC575358.mp3
A-M\barswa\XC574535.mp3
A-M\barswa\XC567605.mp3
A-M\barswa\XC560260.mp3
A-M\barswa\XC555397.mp3
A-M\barswa\XC553565.mp3
A-M\barswa\XC541443.mp3
A-M\barswa\XC492313.mp3
A-M\barswa\XC492312.mp3
A-M\barswa\XC492284.mp3
A-M\barswa\XC486209.mp3
A-M\barswa\XC485999.mp3
A-M\barswa\XC484385.mp3
A-M\barswa\XC469948.mp3
A-M\barswa\XC468466.mp3
A-M\barswa\XC421383.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\barswa\XC339821.mp3
A-M\barswa\XC277319.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\barswa\XC83449.mp3
A-M\barswa\XC585553.mp3
A-M\barswa\XC585273.mp3
A-M\barswa\XC584461.mp3
A-M\barswa\XC584460.mp3
A-M\barswa\XC584459.mp3
A-M\barswa\XC584458.mp3
A-M\barswa\XC582362.mp3
A-M\barswa\XC581794.mp3
A-M\barswa\XC581793.mp3
A-M\barswa\XC581792.mp3
A-M\barswa\XC581791.mp3
A-M\bar

A-M\bewwre\XC501817.mp3
A-M\bewwre\XC490420.mp3
A-M\bewwre\XC456896.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\bewwre\XC448292.mp3
A-M\bewwre\XC435718.mp3
A-M\bewwre\XC407684.mp3
A-M\bewwre\XC407681.mp3
A-M\bewwre\XC407680.mp3
A-M\bewwre\XC405733.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bewwre\XC402950.mp3
A-M\bewwre\XC402801.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bewwre\XC389536.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bewwre\XC384899.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bewwre\XC382034.mp3
A-M\bewwre\XC367996.mp3
A-M\bewwre\XC364702.mp3
A-M\bewwre\XC360348.mp3
A-M\bewwre\XC352808.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bewwre\XC319939.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bewwre\XC314925.mp3
Input signal length=0 is too small to resample from 44100->22050


A-M\bnhcow\XC570948.mp3
A-M\bnhcow\XC569613.mp3
A-M\bnhcow\XC566847.mp3
A-M\bnhcow\XC566843.mp3
A-M\bnhcow\XC564657.mp3
A-M\bnhcow\XC564520.mp3
A-M\bnhcow\XC558120.mp3
A-M\bnhcow\XC550246.mp3
A-M\bnhcow\XC574029.mp3
A-M\bnhcow\XC572462.mp3
A-M\bnhcow\XC547281.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bnhcow\XC547208.mp3
A-M\bnhcow\XC541099.mp3
A-M\bnhcow\XC511208.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\bnhcow\XC502510.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\bnhcow\XC502508.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\bnhcow\XC502504.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\bnhcow\XC473444.mp3
A-M\bnhcow\XC466324.mp3
Input signal length=0 is too small to resample from 48000->22050
A-M\bnhcow\XC440572.mp3
Input signal length=0 is too small to resample from 44100->22050
A-M\bnhcow\XC440571.mp3
Input signal length=0 is too small to res