In [1]:
import numpy as np
import pandas as pd
import math
import time
import re
import os
from skimage import util
from scipy.io import wavfile
from scipy import signal
from scipy import stats
import xarray as xr

#visualizing results
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

np.set_printoptions(suppress=True)

Create data frame of annotation info

In [3]:
annot_path = "C:/Users/Schindler/Documents/Schindler_Lab/Data/Analysis/Excel files/USV/annot_info_df.csv"
annot_data = pd.read_csv(annot_path)
annot_info = pd.DataFrame(data = annot_data)
annot_info = annot_info[annot_info['Annotation'] != 'radar']
print(annot_info.shape)
annot_info.head()

(184, 8)


Unnamed: 0.1,Unnamed: 0,Animal,Group,Session,Begin Time (s),Annotation,Begin Time (s)_1000,time_stamp
0,0,533,5,CPApair,376.574455,low slug,376574.4545,376560.0
1,1,533,5,CPApair,46.306579,low slug,46306.57941,46305.0
2,2,533,5,CPApair,243.272865,low slug,243272.8651,243270.0
3,3,533,5,CPApair,149.708324,low slug,149708.324,149692.5
4,4,533,5,CPApair,243.176192,low slug,243176.1917,243157.5


Find path names for each netcdf file corresponding to wav file that has annotated data

In [4]:
netcdf_path = 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets'

In [5]:
path_names = []
files = os.listdir(netcdf_path)
for file in files: 
        path_names.append(netcdf_path + "/" + file)

path_names

['C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/533_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/534_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/535_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/542_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/543_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/554_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/555_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/559_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/600_xr_Dataset.nc']

In [6]:
def create_annot_dataset(path, annot_info, size):
    
    #get animal name from path
    name = re.search("\d\d\d", path).group(0)
    print(name)
    
    #open xarray Dataset
    data = xr.open_dataset(path)
    print(data['slices'].shape)
    
    #get time_stamps of animal's annotations, select slices and save
    yes = annot_info[annot_info['Animal'] ==  int(name)]['time_stamp'].sort_values().values
    data_yes = data.sel(slices = yes)
    
    #get slice indexs (e.g. time_stamps) that are not annotations, select slices and save
    slice_indexes = data['slices'].values
    no = np.setdiff1d(slice_indexes,yes)
    print(yes.shape[0] + no.shape[0])
    no_short = np.random.choice(no, size, replace=False)
    data_no = data.sel(slices = no_short)
    
    return data_yes, data_no

In [None]:
data_yes, data_no = create_annot_dataset(path_names[2], annot_info, 100)

In [None]:
data_yes

In [None]:
slic = data_yes['__xarray_dataarray_variable__'].sel(slices=90.0)
slic

In [None]:
freq_array = slic['freq'].values
mag_array = slic.max(dim = 'times').values
mag_probs = mag_array/sum(mag_array)
freq_mag = freq_array*mag_probs
spec_cent = sum(freq_mag)
spec_spread = np.var(freq_mag)
spec_skew = stats.skew(freq_mag)
spec_kurtosis = stats.kurtosis(freq_mag)
spec_slope, intercept, r_value, p_value, std_err = stats.linregress(freq_array, mag_array)
spec_roll_off = .95*sum(freq_mag)

In [7]:
def compute_spectral_purity(Dataset):    
    spec_purs = []
    
    for value in Dataset['slices'].values:
        spec_pur = stats.gmean(Dataset.sel(slices = value)['__xarray_dataarray_variable__'].values, axis = None) / Dataset.sel(slices = value)['__xarray_dataarray_variable__'].values.mean()
        
        spec_purs.append(spec_pur)
        
    return spec_purs

In [8]:
def compute_spectral_features(Dataset):
    spec_centroid = []
    spec_spread = []
    spec_skewness = []
    spec_kurtosis = []
    spec_slope = []
    spec_roll_off = []
    
    freq_array = Dataset['freq'].values
    
    for value in Dataset['slices'].values:  
        mag_array = Dataset['__xarray_dataarray_variable__'].sel(slices=value).max(dim = 'times').values
        mag_probs = mag_array/sum(mag_array)
        freq_mag = freq_array*mag_probs
        
        spec_cent = sum(freq_mag)
        spec_spr = np.var(freq_mag)
        spec_skew = stats.skew(freq_mag)
        spec_kurt = stats.kurtosis(freq_mag)
        slope, intercept, r_value, p_value, std_err = stats.linregress(freq_array, freq_mag)
        spec_ro = .95*sum(freq_mag)
            
        spec_centroid.append(spec_cent)
        spec_spread.append(spec_spr)
        spec_skewness.append(spec_skew)
        spec_kurtosis.append(spec_kurt)
        spec_slope.append(slope)
        spec_roll_off.append(spec_ro)
        
        
        
    return spec_centroid, spec_spread, spec_skewness, spec_kurtosis, spec_slope, spec_roll_off

In [9]:
def spec_cent_slice(data_slice, freq_array, dim): 
    
    mag_array = data_slice.max(dim = dim).values

    freq_mag_sum = sum(freq_array*mag_array)
    mag_sum = sum(mag_array)

    spec_cent = freq_mag_sum/mag_sum
            
    return spec_cent

In [10]:
annot_features_yes = pd.DataFrame()
annot_features_no = pd.DataFrame()

for path in path_names:
    
    #get animal name from path
    name = re.search("\d\d\d", path).group(0)
    print(name)
    
    #create datasets of slices of known annotations and a random selection of noise
    data_yes, data_no = create_annot_dataset(path, annot_info, 100)
    
    #compute power sum
    yes_sums = data_yes.groupby('slices').sum(xr.ALL_DIMS)['__xarray_dataarray_variable__'].values
    no_sums = data_no.groupby('slices').sum(xr.ALL_DIMS)['__xarray_dataarray_variable__'].values
    
    #compute spectral purity
    yes_spec_purs = compute_spectral_purity(data_yes)
    no_spec_purs = compute_spectral_purity(data_no)
    
    #compute spectral features
    yes_spec_centroid, yes_spec_spread, yes_spec_skewness, yes_spec_kurtosis, yes_spec_slope, yes_spec_roll_off = compute_spectral_features(data_yes)
    no_spec_centroid, no_spec_spread, no_spec_skewness, no_spec_kurtosis, no_spec_slope, no_spec_roll_off = compute_spectral_features(data_no)

    #add computed features to exisiting dataframe of known annotations
    annot_yes = annot_info[annot_info['Animal'] == int(name)].sort_values(by=['time_stamp'])
    annot_yes['power_sum'] = yes_sums
    annot_yes['spec_pur'] = yes_spec_purs
    annot_yes['spec_cent'] = yes_spec_centroid
    annot_yes['spec_spread'] = yes_spec_spread
    annot_yes['spec_skew'] = yes_spec_skewness
    annot_yes['spec_kurt'] = yes_spec_kurtosis
    annot_yes['spec_slope'] = yes_spec_slope
    annot_yes['spec_roll'] = yes_spec_roll_off
    
    #create and fill dataframe for randomly selected noise slices
    annot_no = pd.DataFrame(columns = ['Animal', 'Group', 'Annotation', 'time_stamp', 'power_sum', 'spec_pur'], index = np.arange(0,100))
    annot_no['Animal'] = name
    annot_no['Group'] = annot_info[annot_info['Animal'] == int(name)]['Group'].iloc[0]
    annot_no['Annotation'] = 'rand_noise'
    annot_no['time_stamp'] = data_no['slices'].values
    annot_no['power_sum'] = no_sums
    annot_no['spec_pur'] = no_spec_purs
    annot_no['spec_cent'] = no_spec_centroid
    annot_no['spec_spread'] = no_spec_spread
    annot_no['spec_skew'] = no_spec_skewness
    annot_no['spec_kurt'] = no_spec_kurtosis
    annot_no['spec_slope'] = no_spec_slope
    annot_no['spec_roll'] = no_spec_roll_off
    
    annot_features_yes = annot_features_yes.append(annot_yes, ignore_index=True)
    annot_features_no = annot_features_no.append(annot_no, ignore_index=True)

#create and save combined dataframe of yes and no
annot_features_yes.drop(['Unnamed: 0', 'Session', 'Begin Time (s)', 'Begin Time (s)_1000'], axis=1, inplace=True)
annot_features_full = pd.concat([annot_features_yes, annot_features_no])
print(annot_features_full.shape)

annot_features_full.to_csv('annot_features_full.csv')

533
533
(26666,)
26666
534
534
(26666,)
26666
535
535
(26666,)
26666
542
542
(26666,)
26666
543
543
(26666,)
26666
554
554
(26666,)
26666
555
555
(26666,)
26666
559
559
(26666,)
26666
600
600
(13511,)
13511
(1084, 12)


In [11]:
annot_features_full

Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur,spec_cent,spec_spread,spec_skew,spec_kurt,spec_slope,spec_roll
0,533,5,low slug,46305.0,3.271490e+04,0.368722,39977.724643,9966.541464,2.313672,9.578221,2.707231e-04,37978.838411
1,533,5,low slug,149692.5,4.303746e+04,0.306856,37931.297975,20543.321215,4.308415,22.308306,-2.391182e-04,36034.733076
2,533,5,low slug,243157.5,2.701935e+05,0.062226,11483.575333,37201.715521,6.940257,50.135018,-1.536689e-03,10909.396566
3,533,5,low slug,243270.0,6.301445e+04,0.232295,32242.081971,21947.090189,5.325133,33.372614,-6.709287e-04,30629.977872
4,533,5,low multi,295560.0,7.493316e+04,0.206291,17095.059221,11740.918797,5.372725,34.224633,-1.092713e-03,16240.306260
5,533,5,low slug,337747.5,2.395255e+05,0.082071,17438.339345,18616.943387,2.647386,6.407941,-1.891211e-03,16566.422377
6,533,5,low slug,353745.0,5.426101e+04,0.271408,30840.046263,6764.789949,2.801140,11.508558,-4.862393e-04,29298.043950
7,533,5,low slug,376560.0,9.874895e+04,0.155727,17716.790632,19571.194689,6.738355,49.346395,-1.001570e-03,16830.951100
8,533,5,low multi,378495.0,2.387272e+05,0.100473,18903.457780,24155.714469,4.637713,25.626232,-1.791002e-03,17958.284891
9,533,5,low slug,378607.5,1.593617e+05,0.125804,20632.266225,35787.103498,5.111438,31.020057,-1.954505e-03,19600.652914
