In [74]:
import librosa, librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [68]:
#store the dataframe with MFCC coefficients drop the grasshopper
df = pd.read_csv('MLNS_with_mfcc_stats_05242024.csv')
df=df.drop(33)

  df = pd.read_csv('MLNS_with_mfcc_stats_05242024.csv')


In [69]:
df['critter_name'].value_counts()

critter_name
cricket    3976
kaydid     1926
cicada       57
Name: count, dtype: int64

In [70]:
#Change critter name to a number
df.loc[df['critter_name']=='cricket', 'critter_num'] = 0
df.loc[df['critter_name']=='kaydid', 'critter_num'] = 1
df.loc[df['critter_name']=='cicada', 'critter_num'] = 2

In [71]:
df['critter_num'].value_counts()

critter_num
0.0    3976
1.0    1926
2.0      57
Name: count, dtype: int64

In [72]:
#For later use, store the dataframe with only crickets and kaydids
df_bin = df.drop(df[df.critter_num == 2].index)

In [41]:
#Returns features truncated to indicated depth
def truncate_mfcc(df, avg_depth=20, var_depth=10):
    df_temp=df['mfcc_'+str(0)+'_avg']
    df_temp=df_temp.rename('blah')
    for n in range(0,avg_depth):
        df_temp=pd.concat([df_temp, df['mfcc_'+str(n)+'_avg']], axis=1)
    for n in range(0,var_depth):
        df_temp=pd.concat([df_temp, df['mfcc_'+str(n)+'_var']], axis=1)        
    return df_temp.drop(columns=['blah'])

In [61]:
#LDA for all three critter names with 80 mfcc features
X=truncate_mfcc(df, avg_depth=40, var_depth=40)
Y = df['critter_num']

In [62]:
X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y,
                                                       shuffle=True,
                                                       random_state=413,
                                                       test_size=.2,
                                                       stratify=Y)

In [64]:
## Make LDA object
LDA = LinearDiscriminantAnalysis()

## Fit the model
LDA.fit(X_train, y_train)

In [66]:
conf_mat = confusion_matrix(y_test, LDA.predict(X_test))

In [67]:
pd.DataFrame(conf_mat,
                 columns = ['Predicted 0', 'Predicted 1', 'Predicted 2'],
                 index = ['Actual 0', 'Actual 1', 'Actual 2'])

Unnamed: 0,Predicted 0,Predicted 1,Predicted 2
Actual 0,735,57,3
Actual 1,95,287,3
Actual 2,6,1,5


In [52]:
#LDA for two critter names with 80 mfcc features
X=truncate_mfcc(df_bin, avg_depth=40, var_depth=40)
Y = df_bin['critter_num']

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y,
                                                       shuffle=True,
                                                       random_state=413,
                                                       test_size=.2,
                                                       stratify=Y)

In [54]:
## Make LDA object
LDA = LinearDiscriminantAnalysis()

## Fit the model
LDA.fit(X_train, y_train)

In [55]:
conf_mat = confusion_matrix(y_train, LDA.predict(X_train))

In [56]:
pd.DataFrame(conf_mat,
                 columns = ['Predicted 0', 'Predicted 1'],
                 index = ['Actual 0', 'Actual 1'])

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,2983,197
Actual 1,326,1215


In [57]:
conf_mat = confusion_matrix(y_test, LDA.predict(X_test))

In [58]:
pd.DataFrame(conf_mat,
                 columns = ['Predicted 0', 'Predicted 1'],
                 index = ['Actual 0', 'Actual 1'])

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,737,59
Actual 1,96,289


In [116]:
#KNN
X=truncate_mfcc(df, avg_depth=10, var_depth=10)
Y = df['critter_num']

In [117]:
X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y,
                                                       shuffle=True,
                                                       random_state=17,
                                                       test_size=.2,
                                                       stratify=Y)

In [118]:
## make the model object
knn_pipe = Pipeline([('scale', StandardScaler()), ('knn', KNeighborsClassifier(5))])

## "fit" the model object
knn_pipe.fit(X_train,
           y_train)

## predict on the training set
pred = knn_pipe.predict(X_test)

In [123]:
## We define it by hand here
## but you can also use accuracy_score from sklearn
## https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
def accuracy(true, predicted):
    return np.sum(true==predicted)/len(predicted)

In [138]:
for depth in range(20,25):
    for k in range(1,30):
        X=truncate_mfcc(df, avg_depth=depth, var_depth=depth)
        Y = df['critter_num']
        X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
        knn_pipe = Pipeline([('scale', StandardScaler()), ('knn', KNeighborsClassifier(k))])
        knn_pipe.fit(X_train, y_train)
        pred = knn_pipe.predict(X_test)
        print(f"Test Accuracy when mfcc depth = {depth} and k = {k}: {accuracy(y_test, pred)}")

Test Accuracy when mfcc depth = 20 and k = 1: 0.7449664429530202
Test Accuracy when mfcc depth = 20 and k = 2: 0.7676174496644296
Test Accuracy when mfcc depth = 20 and k = 3: 0.7692953020134228
Test Accuracy when mfcc depth = 20 and k = 4: 0.7802013422818792
Test Accuracy when mfcc depth = 20 and k = 5: 0.7827181208053692
Test Accuracy when mfcc depth = 20 and k = 6: 0.7843959731543624
Test Accuracy when mfcc depth = 20 and k = 7: 0.7835570469798657
Test Accuracy when mfcc depth = 20 and k = 8: 0.7818791946308725
Test Accuracy when mfcc depth = 20 and k = 9: 0.785234899328859
Test Accuracy when mfcc depth = 20 and k = 10: 0.7760067114093959
Test Accuracy when mfcc depth = 20 and k = 11: 0.7734899328859061
Test Accuracy when mfcc depth = 20 and k = 12: 0.7776845637583892
Test Accuracy when mfcc depth = 20 and k = 13: 0.7785234899328859
Test Accuracy when mfcc depth = 20 and k = 14: 0.7810402684563759
Test Accuracy when mfcc depth = 20 and k = 15: 0.7827181208053692
Test Accuracy when m

KeyboardInterrupt: 