# Extracting Features for PLS, KNN, and SVC analyses

## Import Packages

In [1]:
import numpy as np
import pandas as pd
import librosa
import os
import glob

## Define functions for first set of features

In [2]:
#function to extract average and variance of 40 mfccs across entire audio clip
def extract_mfccs(song, sr, cutoff= 800, n_mfcc = 40):
    mfccs = librosa.feature.mfcc(y = song, fmin = cutoff, n_mfcc = n_mfcc)
    mfcc_avg = np.mean(mfccs, axis = 1)
    mfcc_var = np.var(mfccs, axis = 1)
    return mfcc_avg, mfcc_var

#function to extract average and variance of 40 mfccs across loudest half second
def extract_hs_mfccs(song, sr, cutoff = 800, n_mfcc = 40):
    max_ampj = np.argmax(abs(song))
    time_start, time_stop = round(max_ampj - sr/4), round(max_ampj + sr/4)
    if time_start < 0:
        time_start, time_stop = 0, round(sr/2)
    if time_stop > len(song):
        time_start, time_stop = len(song) - round(sr/2), len(song)
    mfccs = librosa.feature.mfcc(y = song[time_start:time_stop], fmin = cutoff, n_mfcc = n_mfcc)
    mfccs_hs = librosa.feature.mfcc(y = song[time_start:time_stop], fmin = cutoff, n_mfcc = n_mfcc)
    mfcc_hs_avg = np.mean(mfccs, axis = 1)
    mfcc_hs_var = np.var(mfccs, axis = 1)
    if time_start < 0: 
        return (np.full(20, np.nan), np.full(20, np.nan))
    else: 
        return mfcc_hs_avg, mfcc_hs_var 

# return frequency from file that had the greatest magnitude
def extract_peak_frequency(song, sr, cutoff = 800):
    fft_data = np.fft.fft(song)
    freqs = np.fft.fftfreq(len(song))*sr
    freqs_cut = freqs[freqs > cutoff]
    fft_data_cut = fft_data[freqs > cutoff]
    peak_coefficient = np.argmax(np.abs(fft_data_cut))
    peak_freq = freqs_cut[peak_coefficient]
    return peak_freq

# return frequencies from file that had the mean amplitude and variance
def main_freq(song, cutoff = 800):
    D = np.abs(librosa.stft(song))
    cut = int(float(cutoff)/(librosa.fft_frequencies()[1]))
    D = D[cut:]
    DB = librosa.amplitude_to_db(D, ref=np.max)
    maxmean = np.mean(DB[0,:])
    main_meanfreq = 1
    maxvar = np.var(DB[0,:])
    main_varfreq = 0
    for x in range(1,DB.shape[0]):
        mean = np.mean(DB[x,:])
        var = np.var(DB[x,:])
        if mean > maxmean:
            maxmean = mean
            main_meanfreq = x
        if var > maxvar:
            maxvar = var
            main_varfreq = x    
    return main_varfreq * librosa.fft_frequencies()[1], main_meanfreq * librosa.fft_frequencies()[1] #main_freq and max_mean

def main_freq_and_range(f, sound_range = .5):
    variance = compute_var(remove_low(f))
    maxfreq = np.argmax(variance)
    maxvar = max(variance)
    if variance[maxfreq] != maxvar:
        print ('failed! at file ', f)
        return None

    #find range of frequencies around the max freq where the animal is loud 
    min_freq_range = 0
    max_freq_range = 0
    i=0
    bool_high =True
    bool_low = True
    while bool_high or bool_low:
        if bool_high:
            if maxfreq+i>len(variance)-1:
                max_freq_range = maxfreq+i
                bool_high = False
            elif variance[maxfreq+i] < sound_range*maxvar:
                #print ('max freq range =', (maxfreq+i+64)*librosa.fft_frequencies()[1])
                max_freq_range = maxfreq+i
                bool_high = False
        if bool_low:
            if maxfreq-i<0:
                min_freq_range = 0
                bool_low = False
            elif variance[maxfreq-i] < sound_range*maxvar:
                #print ('min freq range =', (maxfreq-i+64)*librosa.fft_frequencies()[1])
                min_freq_range = maxfreq-i
                bool_low = False
        i = i+1
    size = (max_freq_range+64)/(min_freq_range+64)
    return (maxfreq+64)*librosa.fft_frequencies()[1], size

def remove_low(f):
    song, sr = librosa.load(f)
    D = np.abs(librosa.stft(song))
    #FIND OUT HOW MANY FREQUENCIES WE REMOVED HERE! About 10.7*64, 10.7 is the value of librosa.fft_frequencies()[1] 
    D=D[64:]
    DB = librosa.amplitude_to_db(D, ref=np.max)
    return DB

def compute_var(DB):
    variance = np.zeros(DB.shape[0])
    for x in range(0,DB.shape[0]):
        variance[x] = np.var(DB[x,:])
    return variance

## Pare down dataset to those files in the folder with voice-less audio

In [3]:
df = pd.read_csv('FauxRecordings_Data.csv')
files_in_df = df.cat_num+'_cut.wav'
path = r'FauxRecordings_NoVoice/*.wav'
files_in_novoice = glob.glob(path)
files_in_novoice = [os.path.basename(file) for file in files_in_novoice]

df2 = df[files_in_df.isin(files_in_novoice)]
df2


Unnamed: 0,cat_num,fam_or_subfam,critter_name
0,FauxRecording0,Grillinae,Cricket
1,FauxRecording1,Tettigoniinae,Katyidid
2,FauxRecording2,Cicadidae,Cicada
4,FauxRecording4,Tettigoniinae,Katydid


## Add the mfcc features to the dataframe. Then add four remaining features: max_mean (second output of main_freq function above), 'main_freq' and 'range' (outputs of main_freq_and_range), and 'peak_freq' (output of extract_peak_frequency)

In [6]:
path = 'FauxRecordings_NoVoice//'
for i,file in enumerate(df2['cat_num']+'_cut.wav'):
    song, sr = librosa.load(path+file)
    mfccs = extract_mfccs(song, sr)
    mfccs_hs = extract_hs_mfccs(song, sr)
    temp_cat_num = file.split('_')[0]
    for n in range(0,40):
        df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'mfcc_'+str(n)+'_avg'] = mfccs[0][n]
        df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'mfcc_'+str(n)+'_var'] = mfccs[1][n]
        df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'hs_mfcc_'+str(n)+'_avg'] = mfccs_hs[0][n]
        df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'hs_mfcc_'+str(n)+'_var'] = mfccs_hs[1][n]

In [7]:
path = 'FauxRecordings_NoVoice//'
for i,file in enumerate(df2['cat_num']+'_cut.wav'):
    song, sr = librosa.load(path+file)
    A, B = main_freq(song)
    temp_cat_num = file.split('_')[0]
    df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'max_mean'] = B
    freq, rng = main_freq_and_range(path+file)
    df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'range'] = rng
    df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'main_freq'] = freq
    df2.at[df2.loc[df2['cat_num']== temp_cat_num].index[0], 'peak_freq'] = extract_peak_frequency(song=song, sr=sr)    

In [8]:
df2.head(4)

Unnamed: 0,cat_num,fam_or_subfam,critter_name,mfcc_0_avg,mfcc_0_var,hs_mfcc_0_avg,hs_mfcc_0_var,mfcc_1_avg,mfcc_1_var,hs_mfcc_1_avg,...,hs_mfcc_38_avg,hs_mfcc_38_var,mfcc_39_avg,mfcc_39_var,hs_mfcc_39_avg,hs_mfcc_39_var,max_mean,range,main_freq,peak_freq
0,FauxRecording0,Grillinae,Cricket,-129.43515,359.437927,-132.337433,266.561371,62.246742,191.677841,57.151367,...,-5.704198,8.843474,4.206622,10.580957,4.271734,7.988527,10.766602,1.100503,4597.338867,4598.25
1,FauxRecording1,Tettigoniinae,Katyidid,-404.522583,2307.756592,-378.134949,1126.455322,-8.70485,1555.934082,-44.272579,...,-1.197838,5.503016,2.107327,6.707736,2.866376,8.194205,7956.518555,1.388658,9345.410156,9412.010993
2,FauxRecording2,Cicadidae,Cicada,-92.127205,2297.14624,-68.681908,865.231812,-70.515411,169.182709,-60.675735,...,1.270771,3.110382,1.094913,7.77973,-0.555236,5.919888,2465.551758,2.136612,2917.749023,3252.95
4,FauxRecording4,Tettigoniinae,Katydid,-364.519714,2289.035889,-307.482147,57.821579,-14.773803,2476.813232,-78.730766,...,-2.059802,9.158952,1.440268,7.917178,1.340278,6.964387,9485.375977,1.170732,10712.768555,10360.35


In [9]:
#Save the features and classifying labels to a csv file
df2.to_csv('FauxRecording_Data_w_Features.csv', index=False)