In [81]:
import pickle
import IPython.display as ipd
# feature extractoring and preprocessing data
import librosa
import librosa.display
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pathlib import Path
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy import signal
import noisereduce as nr
#Reports
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import pickle

## Preprocessing
Do not run this block unless you have new data

In [None]:
audio_path = Path('/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M')

In [None]:
e_file = '/media/sasanka/Expansion/xeno-canto-bird-recordings-extended-a-m/A-M/amecro/XC264942.mp3'
ipd.display(ipd.Audio(e_file))
y, sr = librosa.load(e_file)
reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)

In [None]:
ipd.Audio(data=reduced_noise, rate=sr)

In [None]:
from scipy import signal
def f_high(y,sr):
    b,a = signal.butter(10, 2000/(sr/2), btype='highpass')
    yf = signal.lfilter(b,a,y)
    return yf

In [None]:
ipd.Audio(data=f_high(reduced_noise, sr), rate=sr)

In [None]:
plt.figure(figsize=(14, 5))
librosa.display.waveshow(y, sr=sr)
librosa.display.waveshow(f_high(y, sr), sr=sr)
librosa.display.waveshow(reduced_noise, sr=sr)
librosa.display.waveshow(f_high(reduced_noise, sr), sr=sr)

In [None]:
sr = 32000

In [None]:
df = pd.read_csv('./train_extended.csv')
# Selecting high-rated sound only
dff = df[df['rating'] > 3.0]
# Selecting shorter files only, less data to process
dff = dff[df['duration'] < 20]
print(len(dff))

In [None]:
# Selecting birds with more than 10 examples left
dfc = dff.groupby('ebird_code')['ebird_code'].count()
dff = dff[~dff['ebird_code'].isin(dfc[dfc.values < 20].index)]
print(len(dff))

In [None]:
# header = 'filename label chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
# for i in range(1, 21):
#     header += f' mfcc{i}'
# header += ' de_chroma_stft de_rmse de_spectral_centroid de_spectral_bandwidth de_rolloff de_zero_crossing_rate'
# for i in range(1, 21):
#     header += f' de_mfcc{i}'
# header = header.split()
# file = open('data.csv', 'w', newline='')
# writer = csv.writer(file)
# writer.writerow(header)
# file.close()

In [None]:
def reduce_noise(y, sr):
    reduced_noise = nr.reduce_noise(y=y, sr=sr, thresh_n_mult_nonstationary=2,stationary=False)
    return f_high(reduced_noise, sr)

In [None]:
def feature_extractor(sound_path, category_name, file_name):
    y, y_sr = librosa.load(sound_path, mono=True)
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=y_sr)
    rmse = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{file_name} {category_name} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    y_denoise = reduce_noise(y, y_sr)
    chroma_stft = librosa.feature.chroma_stft(y=y_denoise, sr=y_sr)
    rmse = librosa.feature.rms(y=y_denoise)
    spec_cent = librosa.feature.spectral_centroid(y=y_denoise, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y_denoise, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y_denoise, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y_denoise)
    mfcc = librosa.feature.mfcc(y=y_denoise, sr=sr)
    to_append += f' {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
    for e in mfcc:
        to_append += f' {np.mean(e)}'
    # print(to_append)
    file = open('data.csv', 'a', newline='')
    writer = csv.writer(file)
    writer.writerow(to_append.split())
    file.close()

In [None]:
sound_categories = dff['ebird_code'].unique()

In [None]:
# category_progress = tqdm(sound_categories)
# for category_name in category_progress:
#     category_progress.desc = category_name
#     #Walk through the dataframe filename values
#     l_files = dff[dff['ebird_code'] == category_name]['filename'].values
#     tqdm.write("Bird: "+category_name+"  files: "+str(len(l_files)))
#     for file_name in tqdm(l_files, desc='Files'):
#         try:
#             sound_path = audio_path/category_name/file_name
#             feature_extractor(sound_path, category_name, file_name)
#         except Exception as e:
#             print(e)
#             pass

## Models

In [31]:
data = pd.read_csv('./nu_data.csv')
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)
data.head()

Unnamed: 0,label,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,de_mfcc11,de_mfcc12,de_mfcc13,de_mfcc14,de_mfcc15,de_mfcc16,de_mfcc17,de_mfcc18,de_mfcc19,de_mfcc20
0,amecro,0.338716,0.034224,3616.920896,1966.074947,5831.529003,0.284085,-300.704651,-73.681175,-152.796417,...,-2.649461,3.687365,-3.583898,-1.690314,-2.57984,5.844545,-0.570906,1.842792,-6.125224,2.682204
1,amecro,0.606191,0.029051,2733.326161,2403.171895,5491.761647,0.171714,-289.052277,69.980736,-41.882755,...,-0.980518,1.679347,0.567278,1.058613,-5.341579,2.657883,0.366914,0.601444,-0.815928,1.95911
2,amecro,0.402761,0.076397,3645.575802,2173.824225,6076.584359,0.294855,-234.259277,-1.378142,-78.33036,...,-6.909959,3.638154,-0.954869,-1.682335,1.721543,0.476318,-1.213779,0.3033,-0.214653,1.033576
3,amecro,0.514074,0.021601,1988.833265,2266.835906,4205.494756,0.087,-392.110748,92.414574,-31.003239,...,3.098013,-3.602407,3.895024,-3.446658,-0.231466,0.64092,0.918258,1.069209,-2.228328,0.905272
4,amecro,0.58942,0.056681,2002.241988,2366.313228,4432.136133,0.095752,-152.192566,119.386864,-4.404914,...,3.06926,2.077627,-3.567248,-1.038863,-0.406278,2.802787,-1.607536,2.183332,-2.002364,-0.568432


In [69]:
bird_list = data.iloc[:, :1]
encoder = LabelEncoder()
encoded_labels = encoder.fit_transform(bird_list)

In [248]:
try:
    data.insert(1, 'encoded_label', encoded_labels)
except:
    pass

In [33]:
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(np.array(data.iloc[:, 2:], dtype = float)), columns = data.columns[2:])

In [34]:
y = data['label']

In [36]:
model_list = ['Logisitic Regression', 'SVM', 'KNN', 'Decision Tree', 'Random Forest', 'Naive Bayes']

In [73]:
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

In [82]:
def run_pipeline(data, name=None):
    pipe_encoder = LabelEncoder()
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(np.array(data.iloc[:, 2:], dtype = float)), columns = data.columns[2:])
    y = data['label']
    encoded_labels = pipe_encoder.fit_transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=40)
    model_pipeline = []
    model_pipeline.append(SGDClassifier())
    model_pipeline.append(SVC(random_state=30))
    model_pipeline.append(KNeighborsClassifier(n_neighbors=30))
    model_pipeline.append(DecisionTreeClassifier(random_state=30))
    model_pipeline.append(RandomForestClassifier(random_state=30))
    model_pipeline.append(GaussianNB())
    # model_pipeline.append(xgb.XGBClassifier(use_label_encoder=False, objective='multi:softmax'))
    # model_list = ['Logisitic Regression', 'SVM', 'KNN', 'Decision Tree', 'Random Forest', 'Naive Bayes']
    classes = list(pipe_encoder.classes_)
    classes.append('macro')
    classes.append('accuracy')
    f1_scores = []
    cm_list = []

    for model in model_pipeline:
        model.fit(X_train, y_train)
        if name:
            pickle.dump(model, file = open('./models/'+name+'_'+type(model).__name__+'.pkl', 'wb'))
        test_class_pred = model.predict(X_test)
        cm_list.append(confusion_matrix(y_test, test_class_pred))
        f1 = f1_score(y_test, test_class_pred, average=None)
        f1 = f1.tolist()
        f1.append(f1_score(y_test, test_class_pred, average='macro'))
        f1.append(accuracy_score(y_test, test_class_pred))
        f1_scores.append(f1)
    # print(len(f1_scores[0]))
    # print(len(classes))
    # f1_result_df = pd.DataFrame({'Classes': classes, 'KNN': f1_scores[0]})
    # f1_result_df = pd.DataFrame({'Classes': classes, 'SVM': f1_scores[0], 'KNN': f1_scores[1], 'Decision Tree': f1_scores[2], 'Random Forest': f1_scores[3], 'Naive Bayes': f1_scores[4]})
    f1_result_df = pd.DataFrame({'Classes': classes, 'SGD': f1_scores[0], 'SVM': f1_scores[1], 'KNN': f1_scores[2], 'Decision Tree': f1_scores[3], 'Random Forest': f1_scores[4], 'Naive Bayes': f1_scores[5]})
    return f1_result_df, cm_list, model_pipeline

In [89]:
f1, cm, trained_models = run_pipeline(data, name = '30_class')
f1

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,amecro,0.181818,0.0,0.0,0.2,0.0,0.0
1,amerob,0.133333,0.0,0.0,0.0,0.235294,0.0
2,barswa,0.177215,0.271186,0.298507,0.172414,0.385965,0.054054
3,bewwre,0.258065,0.0,0.0,0.102564,0.2,0.0
4,blujay,0.275862,0.470588,0.125,0.0,0.133333,0.235294
5,bnhcow,0.2,0.0,0.0,0.0,0.0,0.173913
6,cangoo,0.181818,0.666667,0.0,0.142857,0.2,0.47619
7,carwre,0.275862,0.258065,0.111111,0.162162,0.230769,0.451613
8,caster1,0.0,0.0,0.0,0.0,0.0,0.129032
9,chispa,0.333333,0.0,0.0,0.166667,0.25,0.285714


In [76]:
class_5_birds = []
for i in [2, 10, 17, 27, 28]:
    class_5_birds.append(encoder.classes_[i])

In [249]:
class_5_birds

['barswa', 'comrav', 'eucdov', 'houwre', 'mallar3']

In [84]:
class_5_data = data[data['label'].isin(class_5_birds)]
f1_5, cm_5, trained_models_5 = run_pipeline(class_5_data, name = '5_class')
f1_5

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,barswa,0.561404,0.464286,0.385965,0.393939,0.407407,0.412698
1,comrav,0.857143,0.787234,0.730769,0.53012,0.725275,0.613636
2,eucdov,0.653846,0.769231,0.25,0.47619,0.514286,0.408163
3,houwre,0.650602,0.666667,0.62069,0.487805,0.680412,0.60241
4,mallar3,0.794118,0.745763,0.611111,0.422535,0.656716,0.622951
5,macro,0.703423,0.686636,0.519707,0.462118,0.596819,0.531972
6,accuracy,0.715116,0.69186,0.587209,0.465116,0.627907,0.546512


In [78]:
class_3_birds = []
for i in [10, 17, 28]:
    class_3_birds.append(encoder.classes_[i])

In [88]:
class_3_data = data[data['label'].isin(class_3_birds)]
f1_3, cm_3, trained_models_3 = run_pipeline(class_3_data, name = '3_class')
f1_3

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,comrav,0.909091,0.893617,0.829787,0.741573,0.831461,0.827586
1,eucdov,0.769231,0.777778,0.625,0.611111,0.702703,0.714286
2,mallar3,0.927536,0.939394,0.828571,0.732394,0.885714,0.895522
3,macro,0.868619,0.870263,0.76112,0.695026,0.806626,0.812465
4,accuracy,0.887755,0.887755,0.795918,0.714286,0.826531,0.826531


In [267]:
data[data['label'].isin(class_3_birds)].iloc[:, :28]

Unnamed: 0,label,encoded_label,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,...,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20
581,comrav,10,0.627651,0.001298,3302.967143,2736.900510,6780.609679,0.205043,-512.918823,34.620213,...,-25.326626,-8.201337,-15.937077,-0.443180,-9.238814,2.798203,-6.952209,6.245645,-2.295872,9.764858
582,comrav,10,0.487381,0.027975,1917.636564,1846.729795,3467.885237,0.109244,-318.595367,118.153824,...,-19.218271,-5.569395,-14.937907,0.583224,-11.869913,-0.691014,-4.209258,0.541456,-9.214190,2.956792
583,comrav,10,0.622243,0.003961,2395.517470,2689.891963,5753.292965,0.118364,-401.572540,103.578133,...,-8.240233,5.431741,-6.135738,6.051069,-3.205715,5.349742,-5.506372,4.163332,-5.560931,3.637590
584,comrav,10,0.476066,0.005085,2557.975052,2169.942780,4738.035355,0.168235,-438.514160,82.547783,...,-13.828033,-3.596922,-14.923768,2.733280,-10.093007,-2.573465,-6.562469,-0.627527,-12.561377,-5.648041
585,comrav,10,0.443370,0.003797,2702.147313,2158.704007,4806.089051,0.199580,-473.784973,73.758034,...,-10.117094,4.349098,-7.766016,3.406303,-4.034809,2.534597,-0.725459,9.279177,-4.334186,-1.121109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2123,mallar3,28,0.539939,0.044356,2350.413958,2116.098725,4440.527694,0.139469,-198.905487,87.775589,...,-19.571749,-1.947272,-14.801790,-2.411830,-12.462662,-2.431323,-9.217505,0.904435,-8.287886,2.954143
2124,mallar3,28,0.506421,0.074453,2480.965921,2250.248517,4937.359784,0.148174,-140.160980,87.439133,...,-12.254596,-0.971505,-12.885508,-0.097931,-10.450787,4.611171,-6.059248,2.707269,-8.435923,4.751616
2125,mallar3,28,0.487292,0.082597,3025.620506,2071.373747,5290.266301,0.215188,-143.398514,46.996819,...,-17.286457,-2.457847,-15.542914,-3.705883,-16.116930,-0.389557,-7.744009,0.505532,-9.280234,4.603691
2126,mallar3,28,0.403528,0.046314,3988.376046,1866.075979,5798.808933,0.318541,-383.660492,-155.489059,...,2.617560,-9.126843,-16.766481,6.297186,9.150138,18.077581,-12.578211,-3.993511,-7.643536,16.560572


In [268]:
with_noise_3_class = data[data['label'].isin(class_3_birds)].iloc[:, :28]
with_noise_f1_3, with_noise_cm_3, with_noise_trained_models_3 = run_pipeline(with_noise_3_class, name = 'with_noise_3_class')
with_noise_f1_3

Unnamed: 0,Classes,SGD,SVM,KNN,Decision Tree,Random Forest,Naive Bayes
0,comrav,0.813953,0.851064,0.788462,0.682353,0.8125,0.619048
1,eucdov,0.789474,0.756757,0.666667,0.564103,0.702703,0.553191
2,mallar3,0.888889,0.892308,0.745763,0.75,0.857143,0.769231
3,macro,0.830772,0.833376,0.73363,0.665485,0.790782,0.647157
4,accuracy,0.836735,0.846939,0.755102,0.683673,0.806122,0.653061


## RFE

### 3_classes on SGD

In [232]:
encoder = LabelEncoder()
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(np.array(class_3_data.iloc[:, 2:], dtype = float)), columns = class_3_data.columns[2:])
y = class_3_data['label']
encoded_labels = encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=40)
from sklearn.feature_selection import RFE, RFECV
# from sklearn.svm import SVR
estimator = trained_models_3[0]
selector = RFECV(estimator, n_jobs = -1)
selector = selector.fit(X_train, y_train)
clf = SGDClassifier()
clf.fit(X_train[selector.get_feature_names_out()], y_train)
y_pred = clf.predict(X_test[selector.get_feature_names_out()])
pickle.dump(clf, file = open('./models/3_class_rfe_sgd.pkl', 'wb'))
print("Total samples: "+ str(len(X_test)))

print('Test accuracy:', len(y_pred[y_pred==y_test])/len(y_pred))
print('Classification Report')
print(classification_report(y_test, y_pred))

Total samples: 98
Test accuracy: 0.9183673469387755
Classification Report
              precision    recall  f1-score   support

      comrav       0.95      0.91      0.93        44
      eucdov       0.86      0.90      0.88        21
     mallar3       0.91      0.94      0.93        33

    accuracy                           0.92        98
   macro avg       0.91      0.92      0.91        98
weighted avg       0.92      0.92      0.92        98



### 5_classes on SDG

In [245]:
from sklearn.feature_selection import RFE, RFECV
# from sklearn.svm import SVR
encoder = LabelEncoder()
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(np.array(class_5_data.iloc[:, 2:], dtype = float)), columns = class_5_data.columns[2:])
y = class_5_data['label']
encoded_labels = encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=40)
estimator = trained_models_3[0]
selector = RFECV(estimator, n_jobs = -1)
selector = selector.fit(X_train, y_train)
clf = SGDClassifier()
clf.fit(X_train[selector.get_feature_names_out()], y_train)
y_pred = clf.predict(X_test[selector.get_feature_names_out()])
print("Total samples: "+ str(len(X_test)))
pickle.dump(clf, file = open('./models/5_class_rfe_sgd.pkl', 'wb'))
print('Test accuracy:', len(y_pred[y_pred==y_test])/len(y_pred))

# print('Confusion Matrix')
# print(confusion_matrix(y_test, y_pred))
print('Classification Report')
# f = open('dup_rfe.txt', 'w')
print(classification_report(y_test, y_pred))
# f.close()
# print(classification_report(y_test, y_pred))

Total samples: 172
Test accuracy: 0.7267441860465116
Classification Report
              precision    recall  f1-score   support

      barswa       0.55      0.39      0.45        31
      comrav       0.93      0.84      0.88        44
      eucdov       0.67      0.95      0.78        21
      houwre       0.61      0.72      0.66        43
     mallar3       0.86      0.76      0.81        33

    accuracy                           0.73       172
   macro avg       0.72      0.73      0.72       172
weighted avg       0.73      0.73      0.72       172

