In [27]:
import librosa, librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [28]:
df = pd.read_csv('MLNS_05292024.csv')

  df = pd.read_csv('MLNS_05292024.csv')


In [29]:
#Returns dataframe containing mfcc avg and var, and hs_mfcc avg and var, truncated to indicated depth, as well as Calvin's three features if True
def truncate_mfcc(df, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True):
    df_temp=df['mfcc_'+str(0)+'_avg']
    df_temp=df_temp.rename('blah')
    for n in range(0,avg_depth):
        df_temp=pd.concat([df_temp, df['mfcc_'+str(n)+'_avg']], axis=1)
    for n in range(0,var_depth):
        df_temp=pd.concat([df_temp, df['mfcc_'+str(n)+'_var']], axis=1)
    for n in range(0,hs_avg_depth):
        df_temp=pd.concat([df_temp, df['hs_mfcc_'+str(n)+'_avg']], axis=1)
    for n in range(0,hs_var_depth):
        df_temp=pd.concat([df_temp, df['hs_mfcc_'+str(n)+'_var']], axis=1)
    if calvin_Features:
        df_temp=pd.concat([df_temp, df['main_freq']], axis=1)
        df_temp=pd.concat([df_temp, df['range']], axis=1)
        df_temp=pd.concat([df_temp, df['max_mean']], axis=1)
    return df_temp.drop(columns=['blah'])

In [74]:
fam_dict = {'Gryllinae':'cricket', 'Conocephalinae':'kaydid', 'Oecanthinae':'cricket',
            'Phaneropterinae': 'kaydid', 'Trigonidiinae':'cricket', 'Nemobiinae':'cricket', 'Hapithinae':'cricket', 
            'Mogoplistinae':'cricket', 'Tettigoniinae':'kaydid', 'Pseudophyllinae':'kaydid', 'Cicadidae':'cicada',
            'Gryllotalpidae':'cricket', 'Eneopterinae':'cricket', 'Phalangopsidae':'cricket', 'Listroscelidinae':'cricket'}

#Computes the accuracy of knn at the given value of k, depths of mfcc features, Calvin's features, and critter vs. fam_or_subfam
def knn_test_acc(df, k=5, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('knn', KNeighborsClassifier(k))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(y_test,pred)
    return score

#Computes the accuracy of SVM poly at the given value of degree, C, depths of mfcc features, Calvin's features, and critter vs. fam_or_subfam
def SVC_poly_test_acc(df, degree=2, C=10, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('svc_poly',SVC(kernel='poly', degree=degree, C=C))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(y_test,pred)
    return score

#Computes the accuracy of SVM rbf at the given value C, depths of mfcc features, Calvin's features, and critter vs. fam_or_subfam
def SVC_rbf_test_acc(df, C=10, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('svc_rbf',SVC(kernel='rbf', C=C))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(y_test,pred)
    return score

#Same as above but only predicting critter name (but training on fam_or_subfam)
def SVC_rbf_test_acc_on_crit(df, C=10, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('svc_rbf',SVC(kernel='rbf', C=C))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(fam_to_crit(pd.Series(list(y_test))),fam_to_crit(pd.Series(pred)))
    return score

#Converts a pd Series with fam_or_subfam entries into critter names
def fam_to_crit(series):
    X=series.copy()
    for i in range(0, X.shape[0]):
        X.at[i]=fam_to_crit_string(X[i])
    return X

def fam_to_crit_string(fam_name):
    return fam_dict[fam_name]

In [47]:
fam_dict['Gryllinae']

'cricket'

In [43]:
print(df['fam_or_subfam'].value_counts()

fam_or_subfam
Gryllinae           1419
Conocephalinae       847
Oecanthinae          745
Phaneropterinae      624
Trigonidiinae        553
Nemobiinae           386
Hapithinae           380
Mogoplistinae        372
Tettigoniinae        285
Pseudophyllinae      139
Cicadidae             55
Gryllotalpidae        41
Eneopterinae          31
Phalangopsidae        12
Listroscelidinae      10
Name: count, dtype: int64

In [31]:
#Runs knn over the indicated depth of all 4 mfcc features, and the values of k for knn.
for depth in range(12,16):
    for k in range(1,11):
        print(f"knn Test Accuracy when mfcc depth = {depth} and k = {k}: {knn_test_acc(df,k,depth,depth,depth,depth,True,False)}")        

knn Test Accuracy when mfcc depth = 12 and k = 1: 0.6415254237288136
knn Test Accuracy when mfcc depth = 12 and k = 2: 0.6194915254237288
knn Test Accuracy when mfcc depth = 12 and k = 3: 0.6372881355932203
knn Test Accuracy when mfcc depth = 12 and k = 4: 0.6483050847457628
knn Test Accuracy when mfcc depth = 12 and k = 5: 0.6720338983050848
knn Test Accuracy when mfcc depth = 12 and k = 6: 0.6474576271186441
knn Test Accuracy when mfcc depth = 12 and k = 7: 0.6516949152542373
knn Test Accuracy when mfcc depth = 12 and k = 8: 0.6415254237288136
knn Test Accuracy when mfcc depth = 12 and k = 9: 0.6457627118644068
knn Test Accuracy when mfcc depth = 12 and k = 10: 0.6423728813559322
knn Test Accuracy when mfcc depth = 13 and k = 1: 0.6398305084745762
knn Test Accuracy when mfcc depth = 13 and k = 2: 0.6194915254237288
knn Test Accuracy when mfcc depth = 13 and k = 3: 0.6491525423728813
knn Test Accuracy when mfcc depth = 13 and k = 4: 0.6449152542372881
knn Test Accuracy when mfcc depth

In [69]:
#Runs svc poly over the indicated degree, C, and depth of all 4 mfcc features, including Calvin's features
for degree in range(2,4):
    for C in range(9,11):
        for depth in range(13,15):
            print(f"SVC poly Test Accuracy when mfcc degree = {degree} and C = {C} and depth = {depth}: {SVC_poly_test_acc(df,degree,C,depth,depth,depth,depth,True,False)}") 

SVC poly Test Accuracy when mfcc degree = 2 and C = 9 and depth = 13: 0.6567796610169492
SVC poly Test Accuracy when mfcc degree = 2 and C = 9 and depth = 14: 0.652542372881356
SVC poly Test Accuracy when mfcc degree = 2 and C = 10 and depth = 13: 0.6567796610169492
SVC poly Test Accuracy when mfcc degree = 2 and C = 10 and depth = 14: 0.6550847457627119
SVC poly Test Accuracy when mfcc degree = 3 and C = 9 and depth = 13: 0.6652542372881356
SVC poly Test Accuracy when mfcc degree = 3 and C = 9 and depth = 14: 0.6728813559322034
SVC poly Test Accuracy when mfcc degree = 3 and C = 10 and depth = 13: 0.6694915254237288
SVC poly Test Accuracy when mfcc degree = 3 and C = 10 and depth = 14: 0.6711864406779661


In [76]:
#Runs svc rbf over the indicated C and depth of all 4 mfcc features, including Calvin's features
for C in range(10,11):
    for depth in range(10,20):
        print(f"SVC rbf Test Accuracy when mfcc depth = {depth} and C = {C}: {SVC_rbf_test_acc(df,C,depth,depth,depth,depth,True,False)}") 

SVC rbf Test Accuracy when mfcc depth = 10 and C = 10: 0.690677966101695
SVC rbf Test Accuracy when mfcc depth = 11 and C = 10: 0.6889830508474576
SVC rbf Test Accuracy when mfcc depth = 12 and C = 10: 0.7008474576271186
SVC rbf Test Accuracy when mfcc depth = 13 and C = 10: 0.7067796610169491
SVC rbf Test Accuracy when mfcc depth = 14 and C = 10: 0.6983050847457627
SVC rbf Test Accuracy when mfcc depth = 15 and C = 10: 0.688135593220339
SVC rbf Test Accuracy when mfcc depth = 16 and C = 10: 0.6830508474576271
SVC rbf Test Accuracy when mfcc depth = 17 and C = 10: 0.6830508474576271
SVC rbf Test Accuracy when mfcc depth = 18 and C = 10: 0.6788135593220339
SVC rbf Test Accuracy when mfcc depth = 19 and C = 10: 0.6940677966101695


In [72]:
#Runs svc rbf on fam_or_subfam over the indicated C and depth of all 4 mfcc features, including Calvin's features, but then predicting only critter_name
for C in range(10,11):
    for depth in range(10,20):
        print(f"SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = {depth} and C = {C}: {SVC_rbf_test_acc_on_crit(df,C,depth,depth,depth,depth,True,False)}") 

SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 10 and C = 10: 0.9220338983050848
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 11 and C = 10: 0.9211864406779661
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 12 and C = 10: 0.923728813559322
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 13 and C = 10: 0.9228813559322034
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 14 and C = 10: 0.9245762711864407
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 15 and C = 10: 0.9279661016949152
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 16 and C = 10: 0.926271186440678
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 17 and C = 10: 0.9305084745762712
SVC rbf Test Accuracy (train fam_or_subfam, predict crit_name) when mfcc depth = 1