In [1]:
import numpy as np
import pandas as pd
import math
import time
import re
import os
from scipy.io import wavfile
from skimage import util
from scipy import signal
from scipy import stats

#from sklearn.preprocessing import StandardScaler
#from sklearn.model_selection import KFold, train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV

#from sklearn.cluster import KMeans
#from sklearn.metrics.cluster import silhouette_score

#visualizing results
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#import yellowbrick as yb

In [2]:
def create_slice_from_wav(file_path, slice_len, step_size):
    """Creates small slices from wav file. Slice_len (use sampling frequency to convert to ms). 
    Step_size is amount of overlap between each slice."""
    
    #get animal name
    
    #read in wav file
    samp_freq, sig_data = wavfile.read(file_path)
    sig_data = sig_data[0:150000000]
    print('Sampling frequency: ' + str(samp_freq))
    
    #determine number of samples and length
    n_samples = sig_data.shape[0]
    print('Number of samples: ' + str(n_samples))
    sig_len = n_samples/samp_freq
    print('Length: ' + str(sig_len) + ' sec')
    
    #create slices 
    M = slice_len
    steps = int(M*step_size)
    slices = util.view_as_windows(sig_data, window_shape=(M,), step=steps)
    print(f'Audio shape: {sig_data.shape}, Sliced audio shape: {slices.shape}')
    
    return samp_freq, sig_data, slices, steps

In [3]:
def plot_spec(Sx, times, steps, time_stamp):
    """Plots a spectrogram from a slice"""
    
    f, ax = plt.subplots()
    plt.pcolormesh((times*1000) + (time_stamp), freqs_spec / 1000, 10 * np.log10(Sx), cmap = 'cubehelix')
    ax.ticklabel_format(useOffset=False)
    plt.ylabel('Frequency [kHz]')
    plt.xlabel('Time [msec]')
    plt.show()
    
    return plt

In [4]:
def multi_plot(image_df, time_stamp_list, x, y):
    """Plots spectrograms from a list of time_stamps"""
    for time_stamp in time_stamp_list[x:y]:
        plt.figure(figsize = (2,5))
        plt.pcolormesh((times*1000) + (time_stamp), freqs_spec / 1000, 10 * np.log10(image_df[time_stamp]), cmap = 'cubehelix')
        plt.show()

In [5]:
def find_features(data):
    """Finds spectral flatness and power sum for each time stamp in a df."""
    
    start = time.time()
    
    feature_df = pd.DataFrame(index = data.index, columns = ['animal', 'time_stamp', 'spec_flat', 'power_sum'])
    
    for time_stamp in data.index:
        #spectral flattness
        feature_df.loc[time_stamp]['spec_flat'] = (stats.gmean(data.loc[time_stamp])) / (data.loc[time_stamp].mean())
        #power sum
        feature_df.loc[time_stamp]['power_sum'] = data.loc[time_stamp].sum()
        #time stamp
        feature_df.loc[time_stamp]['time_stamp'] = time_stamp
    
    feature_df['animal'] = data['Animal']
    end = time.time()
    print(end - start)

    return feature_df

Create df of annotated USVs from RavenLite

In [6]:
annot_path = "C:/Users/Schindler/Documents/Schindler_Lab/Data/Analysis/Excel files/USV/USV_annot.csv"

In [None]:
annot_features_df_535_low_multi_slices = find_features(annot_535_low_multi_slices)
print(annot_features_df_535_low_multi_slices.shape)
annot_features_df_535_low_multi_slices

In [None]:
print(annot_features_df_535_low_multi_slices['spec_flat'].max())
print(annot_features_df_535_low_multi_slices['spec_flat'].min())
print(annot_features_df_535_low_multi_slices['spec_flat'].mean())
print(annot_features_df_535_low_multi_slices['power_sum'].max())
print(annot_features_df_535_low_multi_slices['power_sum'].min())
print(annot_features_df_535_low_multi_slices['power_sum'].mean())

In [None]:
time_stamp_list = annot_slice_df.index.values

In [None]:
multi_plot(spec_slices, time_stamp_list)

In [None]:
plt.hist(annot_features_df_535_low_multi_slices['power_sum'].values)
plt.show()

In [None]:
annot_535_low_multi = annot_535[annot_535['Annotation'].str.contains('low multi', regex=False)]
print(annot_535_low_multi.shape)
annot_535_low_multi.head()