In [1]:
import numpy as np
import pandas as pd
import math
import time
import re
import os
from skimage import util
from scipy.io import wavfile
from scipy import signal
from scipy import stats
import xarray as xr

#visualizing results
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

Create data frame of annotation info

In [3]:
annot_path = "C:/Users/Schindler/Documents/Schindler_Lab/Data/Analysis/Excel files/USV/annot_info_df.csv"
annot_data = pd.read_csv(annot_path)
annot_info = pd.DataFrame(data = annot_data)
annot_info = annot_info[annot_info['Annotation'] != 'radar']
print(annot_info.shape)
annot_info.head()

(184, 8)


Unnamed: 0.1,Unnamed: 0,Animal,Group,Session,Begin Time (s),Annotation,Begin Time (s)_1000,time_stamp
0,0,533,5,CPApair,376.574455,low slug,376574.4545,376560.0
1,1,533,5,CPApair,46.306579,low slug,46306.57941,46305.0
2,2,533,5,CPApair,243.272865,low slug,243272.8651,243270.0
3,3,533,5,CPApair,149.708324,low slug,149708.324,149692.5
4,4,533,5,CPApair,243.176192,low slug,243176.1917,243157.5


Find path names for each netcdf file corresponding to wav file that has annotated data

In [4]:
netcdf_path = 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets'

In [5]:
path_names = []
files = os.listdir(netcdf_path)
for file in files: 
        path_names.append(netcdf_path + "/" + file)

path_names

['C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/533_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/534_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/535_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/542_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/543_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/554_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/555_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/559_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/600_xr_Dataset.nc']

In [12]:
def create_annot_dataset(path, annot_info, size):
    
    #get animal name from path
    name = re.search("\d\d\d", path).group(0)
    print(name)
    
    #open xarray Dataset
    data = xr.open_dataset(path)
    print(data['slices'].shape)
    
    #get time_stamps of animal's annotations, select slices and save
    yes = annot_info[annot_info['Animal'] ==  int(name)]['time_stamp'].sort_values().values
    data_yes = data.sel(slices = yes)
    
    #get slice indexs (e.g. time_stamps) that are not annotations, select slices and save
    slice_indexes = data['slices'].values
    no = np.setdiff1d(slice_indexes,yes)
    print(yes.shape[0] + no.shape[0])
    no_short = np.random.choice(no, size, replace=False)
    data_no = data.sel(slices = no_short)
    
    return data_yes, data_no

In [7]:
def compute_spectral_purity(Dataset):    
    spec_purs = []
    
    for value in Dataset['slices'].values:
        spec_pur = stats.gmean(Dataset.sel(slices = value)['__xarray_dataarray_variable__'].values, axis = None) / Dataset.sel(slices = value)['__xarray_dataarray_variable__'].values.mean()
        
        spec_purs.append(spec_pur)
        
    return spec_purs

In [10]:
def compute_spectral_centroids(Dataset):
    spec_centroids = []
    freq_array = Dataset['freq'].values
    
    for value in Dataset['slices'].values:  
        mag_array = Dataset['__xarray_dataarray_variable__'].sel(slices=value).max(dim = 'times').values

        freq_mag_sum = sum(freq_array*mag_array)
        mag_sum = sum(mag_array)

        spec_cent = freq_mag_sum/mag_sum
            
        spec_centroids.append(spec_cent)
        
    return spec_centroids

In [13]:
annot_features_yes = pd.DataFrame()
annot_features_no = pd.DataFrame()

for path in path_names:
    
    #get animal name from path
    name = re.search("\d\d\d", path).group(0)
    print(name)
    
    #create datasets of slices of known annotations and a random selection of noise
    data_yes, data_no = create_annot_dataset(path, annot_info, 100)
    
    #compute power sum
    yes_sums = data_yes.groupby('slices').sum(xr.ALL_DIMS)['__xarray_dataarray_variable__'].values
    no_sums = data_no.groupby('slices').sum(xr.ALL_DIMS)['__xarray_dataarray_variable__'].values
    
    #compute spectral purity
    yes_spec_purs = compute_spectral_purity(data_yes)
    no_spec_purs = compute_spectral_purity(data_no)
    
    #compute spectral centroid
    yes_spec_cents = compute_spectral_centroids(data_yes)
    no_spec_cents = compute_spectral_centroids(data_no)

    #add computed features to exisiting dataframe of known annotations
    annot_yes = annot_info[annot_info['Animal'] == int(name)].sort_values(by=['time_stamp'])
    annot_yes['power_sum'] = yes_sums
    annot_yes['spec_pur'] = yes_spec_purs
    annot_yes['spec_cent'] = yes_spec_cents
    
    #create and fill dataframe for randomly selected noise slices
    annot_no = pd.DataFrame(columns = ['Animal', 'Group', 'Annotation', 'time_stamp', 'power_sum', 'spec_pur'], index = np.arange(0,100))
    annot_no['Animal'] = name
    annot_no['Group'] = annot_info[annot_info['Animal'] == int(name)]['Group'].iloc[0]
    annot_no['Annotation'] = 'rand_noise'
    annot_no['time_stamp'] = data_no['slices'].values
    annot_no['power_sum'] = no_sums
    annot_no['spec_pur'] = no_spec_purs
    annot_no['spec_cent'] = no_spec_cents
    
    annot_features_yes = annot_features_yes.append(annot_yes, ignore_index=True)
    annot_features_no = annot_features_no.append(annot_no, ignore_index=True)

#create and save combined dataframe of yes and no
annot_features_yes.drop(['Unnamed: 0', 'Session', 'Begin Time (s)', 'Begin Time (s)_1000'], axis=1, inplace=True)
annot_features_full = pd.concat([annot_features_yes, annot_features_no])
print(annot_features_full.shape)

annot_features_full.to_csv('annot_features_full.csv')

533
533
(26666,)
26666
534
534
(26666,)
26666
535
535
(26666,)
26666
542
542
(26666,)
26666
543
543
(26666,)
26666
554
554
(26666,)
26666
555
555
(26666,)
26666
559
559
(26666,)
26666
600
600
(13511,)
13511
(1084, 7)
