In [1]:
import librosa, librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import scipy
import math
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [2]:
df = pd.read_csv('MLNS_05282024B.csv')

  df = pd.read_csv('MLNS_05282024B.csv')


In [3]:
#Returns dataframe containing mfcc avg and var, and hs_mfcc avg and var, truncated to indicated depth, as well as Calvin's three features if True
def truncate_mfcc(df, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True):
    df_temp=df['mfcc_'+str(0)+'_avg']
    df_temp=df_temp.rename('blah')
    for n in range(0,avg_depth):
        df_temp=pd.concat([df_temp, df['mfcc_'+str(n)+'_avg']], axis=1)
    for n in range(0,var_depth):
        df_temp=pd.concat([df_temp, df['mfcc_'+str(n)+'_var']], axis=1)
    for n in range(0,hs_avg_depth):
        df_temp=pd.concat([df_temp, df['hs_mfcc_'+str(n)+'_avg']], axis=1)
    for n in range(0,hs_var_depth):
        df_temp=pd.concat([df_temp, df['hs_mfcc_'+str(n)+'_var']], axis=1)
    if calvin_Features:
        df_temp=pd.concat([df_temp, df['main_freq']], axis=1)
        df_temp=pd.concat([df_temp, df['range']], axis=1)
        df_temp=pd.concat([df_temp, df['max_mean']], axis=1)
    return df_temp.drop(columns=['blah'])

In [4]:
#Computes the accuracy of knn at the given value of k, depths of mfcc features, Calvin's features, and critter vs. fam_or_subfam
def knn_test_acc(df, k=5, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('knn', KNeighborsClassifier(k))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(y_test,pred)
    return score

#Computes the accuracy of SVM poly at the given value of degree, C, depths of mfcc features, Calvin's features, and critter vs. fam_or_subfam
def SVC_poly_test_acc(df, degree=2, C=10, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('svc_poly',SVC(kernel='poly', degree=degree, C=C))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(y_test,pred)
    return score

#Computes the accuracy of SVM rbf at the given value C, depths of mfcc features, Calvin's features, and critter vs. fam_or_subfam
def SVC_rbf_test_acc(df, C=10, avg_depth=20, var_depth=10, hs_avg_depth=20, hs_var_depth=10, calvin_Features=True, crit=False):
    X=truncate_mfcc(df=df, avg_depth=avg_depth,var_depth=var_depth,hs_avg_depth=hs_avg_depth,hs_var_depth=hs_var_depth,calvin_Features=calvin_Features)
    if crit:
        Y = df['critter_name']
    else:
        Y = df['fam_or_subfam']
    X_train, X_test, y_train, y_test = train_test_split(X.copy(), Y, shuffle=True, random_state=17, test_size=.2, stratify=Y)
    knn_pipe = Pipeline([('scale', StandardScaler()), ('svc_rbf',SVC(kernel='rbf', C=C))])
    knn_pipe.fit(X_train, y_train)
    pred = knn_pipe.predict(X_test)
    score = accuracy_score(y_test,pred)
    return score

In [5]:
#Runs knn over the indicated depth of all 4 mfcc features, and the values of k for knn.
for depth in range(12,16):
    for k in range(1,11):
        print(f"knn Test Accuracy when mfcc depth = {depth} and k = {k}: {knn_test_acc(df,k,depth,depth,depth,depth,True,False)}")        

knn Test Accuracy when mfcc depth = 12 and k = 1: 0.6474576271186441
knn Test Accuracy when mfcc depth = 12 and k = 2: 0.6220338983050847
knn Test Accuracy when mfcc depth = 12 and k = 3: 0.6296610169491526
knn Test Accuracy when mfcc depth = 12 and k = 4: 0.638135593220339
knn Test Accuracy when mfcc depth = 12 and k = 5: 0.6432203389830509
knn Test Accuracy when mfcc depth = 12 and k = 6: 0.6432203389830509
knn Test Accuracy when mfcc depth = 12 and k = 7: 0.6466101694915254
knn Test Accuracy when mfcc depth = 12 and k = 8: 0.6372881355932203
knn Test Accuracy when mfcc depth = 12 and k = 9: 0.6474576271186441
knn Test Accuracy when mfcc depth = 12 and k = 10: 0.6364406779661017
knn Test Accuracy when mfcc depth = 13 and k = 1: 0.6262711864406779
knn Test Accuracy when mfcc depth = 13 and k = 2: 0.6101694915254238
knn Test Accuracy when mfcc depth = 13 and k = 3: 0.6296610169491526
knn Test Accuracy when mfcc depth = 13 and k = 4: 0.6389830508474577
knn Test Accuracy when mfcc depth 

In [10]:
#Runs scv poly over the indicated degree, C, and depth of all 4 mfcc features, including Calvin's features
for degree in range(2,4):
    for C in range(9,11):
        for depth in range(13,15):
            print(f"SVC poly Test Accuracy when mfcc degree = {degree} and C = {C} and depth = {depth}: {SVC_poly_test_acc(df,degree,C,depth,depth,depth,depth,True,False)}") 

SVC poly Test Accuracy when mfcc degree = 2 and C = 9 and depth = 13: 0.65
SVC poly Test Accuracy when mfcc degree = 2 and C = 9 and depth = 14: 0.6474576271186441
SVC poly Test Accuracy when mfcc degree = 2 and C = 10 and depth = 13: 0.6483050847457628
SVC poly Test Accuracy when mfcc degree = 2 and C = 10 and depth = 14: 0.6491525423728813
SVC poly Test Accuracy when mfcc degree = 3 and C = 9 and depth = 13: 0.6576271186440678
SVC poly Test Accuracy when mfcc degree = 3 and C = 9 and depth = 14: 0.6635593220338983
SVC poly Test Accuracy when mfcc degree = 3 and C = 10 and depth = 13: 0.6584745762711864
SVC poly Test Accuracy when mfcc degree = 3 and C = 10 and depth = 14: 0.6711864406779661


In [8]:
#Runs scv rbf over the indicated C and depth of all 4 mfcc features, including Calvin's features
for C in range(5,13):
    for depth in range(14,15):
        print(f"SVC rbf Test Accuracy when mfcc depth = {depth} and C = {C}: {SVC_rbf_test_acc(df,C,depth,depth,depth,depth,True,False)}") 

SVC rbf Test Accuracy when mfcc depth = 14 and C = 5: 0.7008474576271186
SVC rbf Test Accuracy when mfcc depth = 14 and C = 6: 0.7050847457627119
SVC rbf Test Accuracy when mfcc depth = 14 and C = 7: 0.7050847457627119
SVC rbf Test Accuracy when mfcc depth = 14 and C = 8: 0.7008474576271186
SVC rbf Test Accuracy when mfcc depth = 14 and C = 9: 0.7008474576271186
SVC rbf Test Accuracy when mfcc depth = 14 and C = 10: 0.7025423728813559
SVC rbf Test Accuracy when mfcc depth = 14 and C = 11: 0.7033898305084746
SVC rbf Test Accuracy when mfcc depth = 14 and C = 12: 0.7033898305084746
