In [4]:
import pickle
import IPython.display as ipd
# feature extractoring and preprocessing data
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image
from pathlib import Path
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy import signal
import scipy
import noisereduce as nr
#Reports
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import pickle
import os
import pydub

## Preprocessing

In [None]:
audio_path = Path('/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M')

In [None]:
e_file = '/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M/amecro/XC264942.mp3'
ipd.display(ipd.Audio(e_file))
y, sr = librosa.load(e_file)
reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)

In [None]:
ipd.Audio(data=reduced_noise, rate=sr)

In [None]:
from scipy import signal
def f_high(y,sr):
    b,a = signal.butter(10, 2000/(sr/2), btype='highpass')
    yf = signal.lfilter(b,a,y)
    return yf

In [None]:
ipd.Audio(data=f_high(reduced_noise, sr), rate=sr)

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(y, sr=sr)
librosa.display.waveshow(f_high(y, sr), sr=sr)
librosa.display.waveshow(reduced_noise, sr=sr)
librosa.display.waveshow(f_high(reduced_noise, sr), sr=sr)

In [None]:
sr = 32000

In [None]:
df = pd.read_csv('./train_extended.csv')
# Selecting high-rated sound only
dff = df[df['rating'] > 3.0]
# Selecting shorter files only, less data to process
dff = dff[df['duration'] < 20]
print(len(dff))

In [None]:
# Selecting birds with more than 10 examples left
dfc = dff.groupby('ebird_code')['ebird_code'].count()
dff = dff[~dff['ebird_code'].isin(dfc[dfc.values < 20].index)]
print(len(dff))

In [None]:
sound_categories = dff['ebird_code'].unique()

In [None]:
sound_categories

In [None]:
mfcc_df = pd.DataFrame([], columns = ['ebird_code', 'mfcc_mean'])

In [None]:
from tqdm.notebook import tqdm

In [None]:
# header = 'filename label chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
# for i in range(1, 21):
#     header += f' mfcc{i}'
# header += ' de_chroma_stft de_rmse de_spectral_centroid de_spectral_bandwidth de_rolloff de_zero_crossing_rate'
# for i in range(1, 21):
#     header += f' de_mfcc{i}'
# header = header.split()
# file = open('data.csv', 'w', newline='')
# writer = csv.writer(file)
# writer.writerow(header)
# file.close()

In [None]:
def reduce_noise(y, sr):
    reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)
    return f_high(reduced_noise, sr)

In [None]:
def feature_extractor(sound_path, category_name, file_name):
    y, y_sr = librosa.load(sound_path, mono=True)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=y_sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{file_name} {category_name} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    y_denoise = reduce_noise(y, y_sr)
    chroma_stft = librosa.feature.chroma_stft(y=y_denoise, sr=y_sr)
    rmse = librosa.feature.rms(y=y_denoise)
    spec_cent = librosa.feature.spectral_centroid(y=y_denoise, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y_denoise, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y_denoise, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y_denoise)
    mfcc = librosa.feature.mfcc(y=y_denoise, sr=sr)
    to_append += f' {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    # print(to_append)
    file = open('data.csv', 'a', newline='')
    writer = csv.writer(file)
    writer.writerow(to_append.split())
    file.close()

In [None]:
# category_progress = tqdm(sound_categories)
# for category_name in category_progress:
#     category_progress.desc = category_name
#     #Walk through the dataframe filename values
#     l_files = dff[dff['ebird_code'] == category_name]['filename'].values
#     tqdm.write("Bird: "+category_name+"  files: "+str(len(l_files)))
#     for file_name in tqdm(l_files, desc='Files'):
#         try:
#             sound_path = audio_path/category_name/file_name
#             feature_extractor(sound_path, category_name, file_name)
#         except Exception as e:
#             print(e)
#             pass

## Models

In [31]:
data = pd.read_csv('./nu_data.csv')
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)
data.head()

Unnamed: 0,label,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,de_mfcc11,de_mfcc12,de_mfcc13,de_mfcc14,de_mfcc15,de_mfcc16,de_mfcc17,de_mfcc18,de_mfcc19,de_mfcc20
0,amecro,0.338716,0.034224,3616.920896,1966.074947,5831.529003,0.284085,-300.704651,-73.681175,-152.796417,...,-2.649461,3.687365,-3.583898,-1.690314,-2.57984,5.844545,-0.570906,1.842792,-6.125224,2.682204
1,amecro,0.606191,0.029051,2733.326161,2403.171895,5491.761647,0.171714,-289.052277,69.980736,-41.882755,...,-0.980518,1.679347,0.567278,1.058613,-5.341579,2.657883,0.366914,0.601444,-0.815928,1.95911
2,amecro,0.402761,0.076397,3645.575802,2173.824225,6076.584359,0.294855,-234.259277,-1.378142,-78.33036,...,-6.909959,3.638154,-0.954869,-1.682335,1.721543,0.476318,-1.213779,0.3033,-0.214653,1.033576
3,amecro,0.514074,0.021601,1988.833265,2266.835906,4205.494756,0.087,-392.110748,92.414574,-31.003239,...,3.098013,-3.602407,3.895024,-3.446658,-0.231466,0.64092,0.918258,1.069209,-2.228328,0.905272
4,amecro,0.58942,0.056681,2002.241988,2366.313228,4432.136133,0.095752,-152.192566,119.386864,-4.404914,...,3.06926,2.077627,-3.567248,-1.038863,-0.406278,2.802787,-1.607536,2.183332,-2.002364,-0.568432


In [69]:
bird_list = data.iloc[:, :1]
encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(bird_list)

In [70]:
data.insert(1, 'encoded_label', encoded_labels)

ValueError: cannot insert encoded_label, already exists

In [33]:
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(np.array(data.iloc[:, 2:], dtype = float)), columns = data.columns[2:])

In [34]:
y = data['label']

In [36]:
model_list = ['Logisitic Regression', 'SVM', 'KNN', 'Decision Tree', 'Random Forest', 'Naive Bayes']

In [73]:
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [74]:
def run_pipeline(data, name=None):
    pipe_encoder = LabelEncoder()
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(np.array(data.iloc[:, 2:], dtype = float)), columns = data.columns[2:])
    y = data['label']
    encoded_labels = pipe_encoder.fit_transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=40)
    model_pipeline = []
    model_pipeline.append(SGDClassifier())
    model_pipeline.append(SVC(random_state=30))
    model_pipeline.append(KNeighborsClassifier(n_neighbors=30))
    model_pipeline.append(DecisionTreeClassifier(random_state=30))
    model_pipeline.append(RandomForestClassifier(random_state=30))
    model_pipeline.append(GaussianNB())
    # model_pipeline.append(xgb.XGBClassifier(use_label_encoder=False, objective='multi:softmax'))
    # model_list = ['Logisitic Regression', 'SVM', 'KNN', 'Decision Tree', 'Random Forest', 'Naive Bayes']
    classes = list(pipe_encoder.classes_)
    classes.append('macro')
    f1_scores = []
    cm_list = []

    for model in model_pipeline:
        model.fit(X_train, y_train)
        if name:
            pickle.dump(model, file = open('./models/'+name+'_'+type(model).__name__+'.pkl', 'wb'))
        test_class_pred = model.predict(X_test)
        cm_list.append(confusion_matrix(y_test, test_class_pred))
        f1 = f1_score(y_test, test_class_pred, average=None)
        f1 = f1.tolist()
        f1.append(f1_score(y_test, test_class_pred, average='macro'))
        f1_scores.append(f1)
    # print(len(f1_scores[0]))
    # print(len(classes))
    # f1_result_df = pd.DataFrame({'Classes': classes, 'KNN': f1_scores[0]})
    # f1_result_df = pd.DataFrame({'Classes': classes, 'SVM': f1_scores[0], 'KNN': f1_scores[1], 'Decision Tree': f1_scores[2], 'Random Forest': f1_scores[3], 'Naive Bayes': f1_scores[4]})
    f1_result_df = pd.DataFrame({'Classes': classes, 'SGD': f1_scores[0], 'SVM': f1_scores[1], 'KNN': f1_scores[2], 'Decision Tree': f1_scores[3], 'Random Forest': f1_scores[4], 'Naive Bayes': f1_scores[5]})
    return f1_result_df, cm_list, model_pipeline

In [75]:
f1, cm, trained_models = run_pipeline(data, name = '30_class')
f1

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,amecro,0.181818,0.0,0.0,0.2,0.0,0.0
1,amerob,0.0,0.0,0.0,0.0,0.235294,0.0
2,barswa,0.346667,0.271186,0.298507,0.172414,0.385965,0.054054
3,bewwre,0.0,0.0,0.0,0.102564,0.2,0.0
4,blujay,0.125,0.470588,0.125,0.0,0.133333,0.235294
5,bnhcow,0.181818,0.0,0.0,0.0,0.0,0.173913
6,cangoo,0.166667,0.666667,0.0,0.142857,0.2,0.47619
7,carwre,0.142857,0.258065,0.111111,0.162162,0.230769,0.451613
8,caster1,0.0,0.0,0.0,0.0,0.0,0.129032
9,chispa,0.2,0.0,0.0,0.166667,0.25,0.285714


In [76]:
class_5_birds = []
for i in [2, 10, 17, 27, 28]:
    class_5_birds.append(encoder.classes_[i])

In [77]:
class_5_data = data[data['label'].isin(class_5_birds)]
f1_5, cm_5, trained_models_5 = run_pipeline(class_5_data, name = '5_class')
f1_5

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,barswa,0.528302,0.464286,0.385965,0.393939,0.407407,0.412698
1,comrav,0.8,0.787234,0.730769,0.53012,0.725275,0.613636
2,eucdov,0.816327,0.769231,0.25,0.47619,0.514286,0.408163
3,houwre,0.733333,0.666667,0.62069,0.487805,0.680412,0.60241
4,mallar3,0.80597,0.745763,0.611111,0.422535,0.656716,0.622951
5,macro,0.736786,0.686636,0.519707,0.462118,0.596819,0.531972


In [78]:
class_3_birds = []
for i in [10, 17, 28]:
    class_3_birds.append(encoder.classes_[i])

In [79]:
class_3_data = data[data['label'].isin(class_3_birds)]
f1_3, cm_3, trained_models_3 = run_pipeline(class_3_data, name = '3_class')
f1_3

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,comrav,0.894118,0.893617,0.829787,0.741573,0.831461,0.827586
1,eucdov,0.780488,0.777778,0.625,0.611111,0.702703,0.714286
2,mallar3,0.914286,0.939394,0.828571,0.732394,0.885714,0.895522
3,macro,0.862964,0.870263,0.76112,0.695026,0.806626,0.812465


In [164]:
from sklearn.feature_selection import RFE, RFECV
# from sklearn.svm import SVR
estimator = SGDClassifier()
selector = RFECV(estimator, n_jobs = -1)
selector = selector.fit(X_train, y_train)

In [165]:
selector.get_feature_names_out()

array(['chroma_stft', 'spectral_centroid', 'spectral_bandwidth',
       'rolloff', 'zero_crossing_rate', 'mfcc2', 'mfcc3', 'mfcc4',
       'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11',
       'mfcc13', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20', 'de_chroma_stft',
       'de_rmse', 'de_spectral_centroid', 'de_spectral_bandwidth',
       'de_rolloff', 'de_zero_crossing_rate', 'de_mfcc1', 'de_mfcc2',
       'de_mfcc3', 'de_mfcc4', 'de_mfcc5', 'de_mfcc6', 'de_mfcc7',
       'de_mfcc9', 'de_mfcc10', 'de_mfcc11', 'de_mfcc12', 'de_mfcc13',
       'de_mfcc14', 'de_mfcc16', 'de_mfcc17', 'de_mfcc18', 'de_mfcc19'],
      dtype=object)

In [166]:
X_train[selector.get_feature_names_out()]

Unnamed: 0,chroma_stft,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,...,de_mfcc9,de_mfcc10,de_mfcc11,de_mfcc12,de_mfcc13,de_mfcc14,de_mfcc16,de_mfcc17,de_mfcc18,de_mfcc19
322,0.595127,-0.295479,-0.311428,-0.361810,-0.184530,0.288494,-0.374395,0.671673,0.310303,0.869519,...,-0.563342,0.623638,1.310458,-0.740947,-0.390689,0.077581,-1.201688,0.527205,0.431307,0.401945
524,-0.854356,1.817702,0.735311,1.730515,2.479942,-2.171474,-2.092589,-1.229563,-0.092882,-0.046377,...,-0.048013,0.517805,-0.734811,0.906251,-1.015502,1.279080,0.932624,-0.591873,-0.136048,1.096926
728,1.937576,0.926745,2.177325,1.721051,0.625610,-0.116306,0.706345,-0.148850,1.014448,0.312180,...,0.354948,-0.585320,0.179063,0.013971,-0.268978,0.236937,-0.581100,-0.088994,0.395501,0.209490
207,-0.091950,-0.753707,-0.403186,-0.664423,-0.447876,0.918183,0.005160,-0.470569,-0.349284,-0.416541,...,0.380430,-1.256240,0.191654,0.212108,-0.595392,0.961075,0.335370,0.438638,-0.902362,-0.736267
409,0.053438,-0.731057,-0.194218,-0.498729,-1.014543,0.118654,0.114892,1.518415,1.346073,0.774799,...,0.003820,-0.649095,0.440383,0.404366,-0.135728,-1.039540,-0.303502,0.876533,-0.982953,-0.148871
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,0.210131,-0.784457,-0.374761,-0.826747,-0.670707,0.841581,-0.131375,-0.670811,0.670653,0.580504,...,0.569360,0.249483,-0.264117,-0.452750,0.365122,-0.202938,-0.227142,0.104479,-0.126735,0.349498
0,1.280482,-0.893731,0.672757,-0.607957,-1.230110,0.509437,1.445612,0.436388,1.071443,0.966449,...,-0.961306,0.874091,-0.268693,0.390672,-0.351374,0.285632,0.436835,-0.054087,0.567147,-1.041207
76,0.208218,0.321244,0.151624,0.253622,0.356255,-0.439341,-1.218578,-1.578918,-0.459401,-1.108878,...,0.767474,-1.155218,-0.008652,0.531650,0.177059,-0.786569,-0.547933,0.807537,-0.777962,0.341392
193,0.163238,-0.458285,0.445173,0.077609,-0.674755,0.492893,0.449995,-0.035596,-0.109487,-0.172924,...,-1.093044,-0.469551,2.058543,-1.052332,-0.420382,0.498067,-0.802943,2.036061,-2.229737,1.502486


In [167]:
clf = SGDClassifier()
clf.fit(X_train[selector.get_feature_names_out()], y_train)

In [168]:
y_pred = clf.predict(X_test[selector.get_feature_names_out()])
print("Total samples: "+ str(len(X_test)))

print('Test accuracy:', len(y_pred[y_pred==y_test])/len(y_pred))

# print('Confusion Matrix')
# print(confusion_matrix(y_test, y_pred))
print('Classification Report')
f = open('dup_rfe.txt', 'w')
f.write(classification_report(y_test, y_pred))
f.close()
# print(classification_report(y_test, y_pred))

Total samples: 172
Test accuracy: 0.7558139534883721
Classification Report
