In [1]:
import numpy as np
import pandas as pd
import math
import time
import re
import os
from skimage import util
from scipy.io import wavfile
from scipy import signal
from scipy import stats
import xarray as xr

#visualizing results
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

Create data frame of annotation info

In [None]:
annot_path = "C:/Users/Schindler/Documents/Schindler_Lab/Data/Analysis/Excel files/USV/annot_info_df.csv"
annot_data = pd.read_csv(annot_path)
annot_info = pd.DataFrame(data = annot_data)
annot_info = annot_info[annot_info['Annotation'] != 'radar']
print(annot_info.shape)
annot_info.head()

Find path names for each netcdf file corresponding to wav file that has annotated data

In [4]:
netcdf_path = 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets'

In [5]:
path_names = []
files = os.listdir(netcdf_path)
for file in files: 
        path_names.append(netcdf_path + "/" + file)

path_names

['C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/533_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/534_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/535_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/542_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/543_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/554_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/555_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/559_xr_Dataset.nc',
 'C:/Users/Schindler/Documents/ProgrammingFun/USV_python/Datasets/600_xr_Dataset.nc']

In [6]:
def create_annot_dataset(path, annot_info, size):
    
    #get animal name from path
    name = re.search("\d\d\d", path).group(0)
    print(name)
    
    #open xarray Dataset
    data = xr.open_dataset(path)
    print(data['slices'].shape)
    
    #get time_stamps of animal's annotations, select slices and save
    yes = annot_info[annot_info['Animal'] ==  int(name)]['time_stamp'].sort_values().values
    print(yes.shape)
    data_yes = data.sel(slices = yes)
    
    #get slice indexs (e.g. time_stamps) that are not annotations, select slices and save
    slice_indexes = data['slices'].values
    no = np.setdiff1d(slice_indexes,yes)
    print(no.shape)
    print(yes.shape[0] + no.shape[0])
    no_short = np.random.choice(no, size, replace=False)
    data_no = data.sel(slices = no_short)
    
    return data_yes, data_no

In [8]:
annot_features_yes = pd.DataFrame()
annot_features_no = pd.DataFrame()

for path in path_names:
    
    #get animal name from path
    name = re.search("\d\d\d", path).group(0)
    print(name)
    
    #create datasets of slices of known annotations and a random selection of noise
    data_yes, data_no = create_annot_dataset(path, annot_info, 100)
    
    #compute power sum and spectral purity
    yes_sums = data_yes.groupby('slices').sum(xr.ALL_DIMS)['__xarray_dataarray_variable__'].values
    no_sums = data_no.groupby('slices').sum(xr.ALL_DIMS)['__xarray_dataarray_variable__'].values
    
    yes_spec_purs = []
    for value in data_yes['slices'].values:
        spec_pur = stats.gmean(data_yes.sel(slices = value)['__xarray_dataarray_variable__'].values, axis = None) / data_yes.sel(slices = value)['__xarray_dataarray_variable__'].values.mean()
        yes_spec_purs.append(spec_pur)
        
    no_spec_purs = []
    for value in data_no['slices'].values:
        spec_pur = stats.gmean(data_no.sel(slices = value)['__xarray_dataarray_variable__'].values, axis = None) / data_no.sel(slices = value)['__xarray_dataarray_variable__'].values.mean()
        no_spec_purs.append(spec_pur)
    
    #add computed features to exisiting dataframe of known annotations
    annot_yes = annot_info[annot_info['Animal'] == int(name)].sort_values(by=['time_stamp'])
    annot_yes['power_sum'] = yes_sums
    annot_yes['spec_pur'] = yes_spec_purs
    
    #create and fill dataframe for randomly selected noise slices
    annot_no = pd.DataFrame(columns = ['Animal', 'Group', 'Annotation', 'time_stamp', 'power_sum', 'spec_pur'], index = np.arange(0,100))
    annot_no['Animal'] = name
    annot_no['Group'] = annot_info[annot_info['Animal'] == int(name)]['Group'].iloc[0]
    annot_no['Annotation'] = 'rand_noise'
    annot_no['time_stamp'] = data_no['slices'].values
    annot_no['power_sum'] = no_sums
    annot_no['spec_pur'] = no_spec_purs
    
    annot_features_yes = annot_features_yes.append(annot_yes, ignore_index=True)
    annot_features_no = annot_features_no.append(annot_no, ignore_index=True)

#create and save combined dataframe of yes and no
annot_features_yes.drop(['Unnamed: 0', 'Session', 'Begin Time (s)', 'Begin Time (s)_1000'], axis=1, inplace=True)
annot_features_full = pd.concat([annot_features_yes, annot_features_no])
print(annot_features_full.shape)

annot_features_full.to_csv('annot_features_full.csv')

533
533
(26666,)
(12,)
(26654,)
26666
534
534
(26666,)
(11,)
(26655,)
26666
535
535
(26666,)
(36,)
(26630,)
26666
542
542
(26666,)
(14,)
(26652,)
26666
543
543
(26666,)
(7,)
(26659,)
26666
554
554
(26666,)
(13,)
(26653,)
26666


  log_a = np.log(a)


555
555
(26666,)
(6,)
(26660,)
26666
559
559
(26666,)
(10,)
(26656,)
26666
600
600
(13511,)
(75,)
(13436,)
13511
(1084, 6)


In [9]:
annot_features_full.head()

Unnamed: 0,Animal,Group,Annotation,time_stamp,power_sum,spec_pur
0,533,5,low slug,46305.0,32714.904297,0.368722
1,533,5,low slug,149692.5,43037.464844,0.306856
2,533,5,low slug,243157.5,270193.53125,0.062226
3,533,5,low slug,243270.0,63014.449219,0.232295
4,533,5,low multi,295560.0,74933.164062,0.206291
