# Extracting Features for PLS, KNN, and SVC analyses

## Import Packages

In [116]:
import numpy as np
import pandas as pd
import librosa
import os
import glob

## Define functions for first set of features

In [144]:
#function to extract average and variance of 40 mfccs across entire audio clip
def extract_mfccs(song, sr, cutoff= 800, n_mfcc = 40):
    mfccs = librosa.feature.mfcc(y = song, fmin = cutoff, n_mfcc = n_mfcc)
    mfcc_avg = np.mean(mfccs, axis = 1)
    mfcc_var = np.var(mfccs, axis = 1)
    return mfcc_avg, mfcc_var

#function to extract average and variance of 40 mfccs across loudest half second
def extract_hs_mfccs(song, sr, cutoff = 800, n_mfcc = 40):
    max_ampj = np.argmax(abs(song))
    time_start, time_stop = round(max_ampj - sr/4), round(max_ampj + sr/4)
    if time_start < 0:
        time_start, time_stop = 0, round(sr/2)
    if time_stop > len(song):
        time_start, time_stop = len(song) - round(sr/2), len(song)
    mfccs = librosa.feature.mfcc(y = song[time_start:time_stop], fmin = cutoff, n_mfcc = n_mfcc)
    mfccs_hs = librosa.feature.mfcc(y = song[time_start:time_stop], fmin = cutoff, n_mfcc = n_mfcc)
    mfcc_hs_avg = np.mean(mfccs, axis = 1)
    mfcc_hs_var = np.var(mfccs, axis = 1)
    if time_start < 0: 
        return (np.full(20, np.nan), np.full(20, np.nan))
    else: 
        return mfcc_hs_avg, mfcc_hs_var 

# return frequency from file that had the greatest magnitude
def extract_peak_frequency(song, sr, cutoff = 800):
    fft_data = np.fft.fft(song)
    freqs = np.fft.fftfreq(len(song))*sr
    freqs_cut = freqs[freqs > cutoff]
    fft_data_cut = fft_data[freqs > cutoff]
    peak_coefficient = np.argmax(np.abs(fft_data_cut))
    peak_freq = freqs_cut[peak_coefficient]
    return peak_freq

# return frequencies from file that had the mean amplitude and variance
def main_freq(song, cutoff = 800):
    D = np.abs(librosa.stft(song))
    cut = int(float(cutoff)/(librosa.fft_frequencies()[1]))
    D = D[cut:]
    DB = librosa.amplitude_to_db(D, ref=np.max)
    maxmean = np.mean(DB[0,:])
    main_meanfreq = 0
    maxvar = np.var(DB[0,:])
    main_varfreq = 0
    for x in range(1,DB.shape[0]):
        mean = np.mean(DB[x,:])
        var = np.var(DB[x,:])
        if mean > maxmean:
            maxmean = mean
            main_meanfreq = x
        if var > maxvar:
            maxvar = var
            main_varfreq = x
    return main_varfreq * librosa.fft_frequencies()[1], main_meanfreq * librosa.fft_frequencies()[1] #main_freq and max_mean

## Pare down dataset to those files in the folder with voice-less audio

In [140]:
df = pd.read_csv('FauxRecordings_Data.csv')
files_in_df = df.cat_num+'_cut.wav'
path = r'FauxRecordings_NoVoice/*.wav'
files_in_novoice = glob.glob(path)
files_in_novoice = [os.path.basename(file) for file in files_in_novoice]

df2 = df[files_in_df.isin(files_in_novoice)]
df2


Unnamed: 0,cat_num,fam_or_subfam,critter_name
0,FauxRecording0,Grillinae,Cricket
1,FauxRecording1,Tettigoniinae,Katyidid
2,FauxRecording2,Cicadidae,Cicada
4,FauxRecording4,Tettigoniinae,Katydid


## Extract features for each file/row

In [173]:
path = 'FauxRecordings_NoVoice//'
new_df = []
for i,file in enumerate(df2['cat_num']+'_cut.wav'):
    song, sr = librosa.load(path+file)
    mfccs = np.concatenate(np.concatenate(extract_mfccs(song, sr)), np.concatenate(extract_hs_mfccs(song, sr)))
    new_df.append(mfccs)

TypeError: only integer scalar arrays can be converted to a scalar index

In [176]:
a = np.concatenate(extract_hs_mfccs(song, sr))
b = np.concatenate(extract_mfccs(song, sr))
a + b

array([-2.61772583e+02,  1.19398109e+02, -2.68957853e+00,  5.53495407e+01,
       -3.16891022e+01,  3.38817291e+01, -2.98345680e+01,  3.62719269e+01,
       -3.71084938e+01,  4.91259689e+01, -2.28945122e+01,  2.74803829e+00,
        8.96912193e+00,  3.66642523e+00, -7.64650154e+00,  5.22575319e-01,
        1.54313660e+01, -1.51237297e+01,  9.52152252e+00,  1.12711601e+01,
       -1.54597301e+01,  1.39654255e+01, -1.62856197e+01,  7.60196590e+00,
       -4.53901815e+00,  1.17540894e+01, -9.45436478e+00,  2.47037721e+00,
        4.58465576e+00, -3.13295674e+00, -1.22588611e+00,  3.90589619e+00,
       -8.80881310e-01,  7.26289463e+00, -7.96189737e+00,  6.49376774e+00,
       -9.83524513e+00,  6.50472689e+00, -1.03272600e+01,  8.47835541e+00,
        6.25999268e+02,  3.40977417e+02,  6.67057648e+01,  2.69657837e+02,
        4.65035172e+01,  1.80315430e+02,  3.43181091e+02,  7.79029083e+01,
        1.01370628e+02,  1.79391632e+02,  4.39460602e+01,  6.58493347e+01,
        9.49612274e+01,  

In [None]:
new_df = pd.DataFrame(new_df)
column_names = [f'hs_mfcc_{i}_avg' for i in range(0, df.shape[1])] + [f'hs_mfcc_{i}_var' for i in range(0, df.shape[1])]# column names
df.columns = column_names 

#df = pd.DataFrame(mfcc_avg_list) # make the list a data frame
#column_names = [f'hs_mfcc_{i}_avg' for i in range(0, df.shape[1])] # column names
#df.columns = column_names 
#df_clean = df.dropna() # drop NANs
#df_clean

In [161]:
new_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
0,-129.43515,62.246742,-6.032647,23.458515,-17.166088,16.810387,-15.857244,17.613438,-18.077768,24.889097,...,16.997343,10.931653,9.343766,9.229485,9.555801,10.141433,9.522658,9.765179,10.365724,10.580957
1,-404.522583,-8.70485,19.790049,-8.003279,2.504146,3.371484,-4.156763,5.484226,-5.348891,5.136176,...,8.228044,7.42636,6.939614,6.901233,7.774852,8.858764,7.779809,6.899137,7.485622,6.707736
2,-92.127205,-70.515411,-113.712387,-2.033062,-20.325293,2.247173,-3.398164,36.000706,-4.431626,-12.016228,...,7.511906,8.207579,17.957458,10.438013,16.305763,8.547948,23.268047,9.335292,13.642042,7.779731
3,-364.519714,-14.773803,24.598415,-6.85662,2.444881,-3.155872,-1.182,-0.975709,-2.964547,1.585627,...,7.771996,7.953443,8.913389,8.104065,8.057495,7.689389,8.754564,7.928741,7.537112,7.917179


In [None]:


df = pd.DataFrame(mfcc_avg_list) # make the list a data frame
column_names = [f'hs_mfcc_{i}_avg' for i in range(0, df.shape[1])] # column names
df.columns = column_names 
df_clean = df.dropna() # drop NANs
df_clean

df2 = pd.DataFrame(mfcc_var_list) # make the list a data frame
column_names = [f'hs_mfcc_{i}_var' for i in range(0, df.shape[1])] # column names
df2.columns = column_names 
df_clean2 = df2.dropna() # drop NANs
df_clean2

In [None]:


#remove ~800hz and below
def remove_low(f):
    song, sr = librosa.load(f)
    D = np.abs(librosa.stft(song))
     D=D[64:]
    DB = librosa.amplitude_to_db(D, ref=np.max)
    return DB

def main_freq_and_range(f, sound_range = .5):
    variance = np.var(remove_low(f), axis = 1)
    maxfreq = np.argmax(variance)
    maxvar = max(variance)
    if variance[maxfreq] != maxvar:
        print ('failed! at file ', f)
        return None

    #find range of frequencies around the max freq where the animal is loud 
    min_freq_range = 0
    max_freq_range = 0
    i=0
    bool_high =True
    bool_low = True
    while bool_high or bool_low:
        if bool_high:
            if maxfreq+i>len(variance)-1:
                max_freq_range = maxfreq+i
                bool_high = False
            elif variance[maxfreq+i] < sound_range*maxvar:
                #print ('max freq range =', (maxfreq+i+64)*librosa.fft_frequencies()[1])
                max_freq_range = maxfreq+i
                bool_high = False
        if bool_low:
            if maxfreq-i<0:
                min_freq_range = 0
                bool_low = False
            elif variance[maxfreq-i] < sound_range*maxvar:
                #print ('min freq range =', (maxfreq-i+64)*librosa.fft_frequencies()[1])
                min_freq_range = maxfreq-i
                bool_low = False
        i = i+1
    size = (max_freq_range+64)/(min_freq_range+64)
    return size


array([-1.32337433e+02,  5.71513672e+01,  3.34306812e+00,  3.18910275e+01,
       -1.45230131e+01,  1.70713425e+01, -1.39773245e+01,  1.86584911e+01,
       -1.90307255e+01,  2.42368698e+01, -1.11564493e+01,  1.10608101e+00,
        4.50567675e+00,  1.33245420e+00, -3.18900704e+00, -7.71016538e-01,
        7.90912676e+00, -7.98941088e+00,  5.79168081e+00,  5.78920555e+00,
       -7.73348713e+00,  6.53339577e+00, -8.51848507e+00,  4.25671196e+00,
       -1.14088082e+00,  5.29183054e+00, -5.14374304e+00,  5.00867128e-01,
        3.59931111e+00, -1.08544350e+00, -9.49866712e-01,  1.70051086e+00,
        8.53020668e-01,  3.94603968e+00, -4.54895782e+00,  2.40375805e+00,
       -5.07259035e+00,  2.71985888e+00, -5.70419741e+00,  4.27173424e+00,
        2.66561371e+02,  1.49299606e+02,  1.92344475e+01,  1.39988739e+02,
        2.06970081e+01,  9.37425613e+01,  1.53819656e+02,  3.32619820e+01,
        5.29541473e+01,  8.27161560e+01,  1.99780941e+01,  2.95269299e+01,
        4.90922890e+01,  