<a href="https://colab.research.google.com/github/grace3999/USV_Python/blob/colab/Notebooks/0_xr_create_annotations_df.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#mount google drive containings required files: 1) csv of annotation features, 2) netcdf files
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
import numpy as np
import pandas as pd
import os
import re

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings; warnings.simplefilter('ignore')
np.set_printoptions(suppress=True)

In [0]:
def get_dir_list(dir_path):
    """takes in a directory path which contains the selection tables of annotations created from Raven Soft Lite, 
    creates list of path names"""
    
    path_names = []
    dirs = os.listdir(dir_path)
    for file in dirs: 
        path_names.append(dir_path + "/" + file)

    return path_names   

In [0]:
#may need to be updated based on file naming scheme
def get_file_info(path_name, order):
    """takes in a file path for annotation selections table and finds the animal_number and session and saves each accordingly. 
    'order' refers to whether the animal or session is listed first in the path name
    each file should be named with animal number and exp (e.g. 100_CPA.Table.1.selections)"""
    
    if order == 'animal':
      animal_number, session = re.split('_|-', path_name.split('/')[-1].split('.')[0])[0:2]
    else:
      session, animal_number = re.split('_|-', path_name.split('/')[-1].split('.')[0])[0:2] 

    return animal_number, session

In [0]:
def create_animal_df(path_name, animal_number, session, ts_mult):
    """takes in a file path for annotation selections table, animal number, session, and timestamp multiple and creates data frame.
    computes closest timestamp (based on multiple) and adds animal number, session, and time stamp columns"""
    
    data = pd.read_table(path_name)
    data = pd.DataFrame(data = data)
    
    data['animal_number'] = [animal_number]*data.shape[0]
    data['session'] = [session]*data.shape[0]
    
    #Determine closest time stamp of each annotation and add as column to df
    data['Begin Time (s)_1000'] = data['Begin Time (s)']*1000
    time_lambda = lambda a: (a // ts_mult)*ts_mult
    data['time_stamp'] = time_lambda(data['Begin Time (s)_1000'])
    
    data = data[['animal_number', 'session', 'time_stamp', 'Annotation', 'High Freq (Hz)']]

    return data

In [0]:
def create_annot_df(dir_path, order, ts_mult, session_name, save_path):
    """takes in a directory path which contains the selection tables of annotations created from Raven Soft Lite,
    uses get_dir_list, get_file_info, and create_animal_df functions 
    to create a single data frame containing annotations for each file in dir_path"""
    
    annot_df = pd.DataFrame()
    
    path_names = get_dir_list(dir_path)
    
    for path in path_names:
        animal_number, session = get_file_info(path, order)
        animal_df = create_animal_df(path, animal_number, session, ts_mult)
        annot_df = annot_df.append(animal_df)
    
    #save as csv using name of dir_path
    annot_df.to_csv(str(save_path + '/annot_df_' + str(session_name) + '.csv'))
    
    return annot_df

In [0]:
path_CPA = '/content/gdrive/Team Drives/USV_eScience_Incubator/Data/annotation_tables/CPA_pair_tables'
path_homecage = '/content/gdrive/Team Drives/USV_eScience_Incubator/Data/annotation_tables/Homecage_pair_tables'
path_pain = '/content/gdrive/Team Drives/USV_eScience_Incubator/Data/annotation_tables/Pain_tables'

In [0]:
order = 'animal'
ts_mult=22.5
session_name = 'homecage2'
save_path = '/content/gdrive/Team Drives/USV_eScience_Incubator/Data/annotation_data_frames'

annot_df = create_annot_df(path_homecage, order, ts_mult, session_name, save_path)
print(annot_df.shape)
annot_df.head()

(69, 5)


Unnamed: 0,animal_number,session,time_stamp,Annotation,High Freq (Hz)
0,527,cagepair,18922.5,high slug,78276.7
1,527,cagepair,24750.0,low slug,13046.1
2,527,cagepair,174352.5,bbc,41988.2
3,527,cagepair,342877.5,bbc,53888.7
4,527,cagepair,393052.5,bbc,50970.9
