In [69]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from os.path import join as pjoin
import sys
import re
from natsort import natsort_keygen
sys.path.append('..')
import circletrack_behavior as ctb

In [170]:
## Set parameters
session_dict = {'mc03': ['Training1', 'Training2', 'Training3', 'Training4', 'Reversal1', 'Reversal2', 'Reversal4', 'Training_Reversal'],
                'mc06': ['Training1', 'Training2', 'Training3', 'Training4', 'Reversal1', 'Reversal2', 'Reversal3', 'Reversal4', 'Training_Reversal'],
                'mc07': ['Training1', 'Training2', 'Training3', 'Training4', 'Reversal1', 'Reversal2', 'Reversal3', 'Reversal4', 'Training_Reversal'],
                'mc09': ['Training1', 'Training2', 'Training3', 'Training4', 'Reversal1', 'Reversal2', 'Reversal3', 'Reversal4', 'Training_Reversal'],
                'mc11': ['Training1', 'Training2', 'Training3', 'Training4', 'Reversal1', 'Reversal2', 'Reversal3', 'Reversal4', 'Training_Reversal']}
behavior_path = '../../EnsembleRemodeling_Resubmission/circletrack_data'  
output_path = '../../EnsembleRemodeling_Resubmission/circletrack_data/output/behav' 
cohort_number = 'cohort1'
mouse_list = ['mc06', 'mc07', 'mc09', 'mc11']   
## Set relative path variable for circletrack behavior data
path = pjoin(behavior_path, 'Data/**/**/**/circle_track.csv')
## Set str2match variable (regex for mouse name)
str2match = '(mc[0-9]+)'
## Create list of files
file_list = ctb.get_file_list(path)
## Loop through file_list to extract mouse name
mouseID = []
for file in file_list:
    mouse = ctb.get_mouse(file, str2match)
    mouseID.append(mouse)
## Combine file_list and mouseID
combined_list = ctb.combine(file_list, mouseID)

In [171]:
for mouse in mouse_list:
    natsort_key = natsort_keygen()
    subset = ctb.subset_combined(combined_list, mouse).reset_index(drop=True)
    subset = sorted(subset, key = natsort_key)
    for id, session in enumerate(session_dict[mouse]):
        df = pd.DataFrame()
        circle_track = pd.read_csv(subset[id])
        circle_track = ctb.crop_data(circle_track)
        circle_track = ctb.normalize_timestamp(circle_track).reset_index(drop=True)
        
        data = {'x': [], 'y': [], 'frame': [], 'water': [], 'lick_port': [], 
                'a_pos': [], 't': [], 'animal': [], 'session': [], 'cohort': []}
        for i in np.arange(0, circle_track.shape[0]):
            data['frame'].append(i)
            data['t'].append(circle_track.loc[i, 'timestamp'])
            data['animal'].append(mouse)
            data['session'].append(session)
            data['cohort'].append(cohort_number)

            if circle_track.loc[i, 'event'] == 'LOCATION':
                data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i, 'data']).group(1)))
                data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i, 'data']).group(1)))
                data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i, 'data']).group(1)))
                data['lick_port'].append('-1')
                data['water'].append(False)

            elif circle_track.loc[i, 'event'] == 'LICK':
                data['lick_port'].append(re.search('reward([0-9])', circle_track.loc[i, 'data']).group(1))
                data['water'].append(False)
                if circle_track.loc[i-1, 'event'] == 'LOCATION':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                elif circle_track.loc[i+1, 'event'] == 'LOCATION':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))

            elif circle_track.loc[i, 'event'] == 'REWARD':
                data['lick_port'].append(re.search('reward([0-9])', circle_track.loc[i, 'data']).group(1))
                data['water'].append(True)
                if circle_track.loc[i-1, 'event'] == 'LOCATION':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                elif circle_track.loc[i+1, 'event'] == 'LOCATION':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
            
            elif circle_track.loc[i, 'event'] == 'START':
                if circle_track.loc[i+1, 'event'] == 'LOCATION':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i+1, 'data']).group(1)))
                elif circle_track.loc[i+1, 'event'] == 'LICK':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i+2, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i+2, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i+2, 'data']).group(1)))
                data['lick_port'].append('-1')
                data['water'].append(False)
                
            elif circle_track.loc[i, 'event'] == 'TERMINATE':
                if circle_track.loc[i-1, 'event'] == 'LOCATION':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i-1, 'data']).group(1)))
                elif circle_track.loc[i-1, 'event'] == 'LICK':
                    data['x'].append(float(re.search(r'X([0-9]+)', circle_track.loc[i-2, 'data']).group(1)))
                    data['y'].append(float(re.search(r'Y([0-9]+)', circle_track.loc[i-2, 'data']).group(1)))
                    data['a_pos'].append(float(re.search(r'A([0-9]+)', circle_track.loc[i-2, 'data']).group(1)))
                data['lick_port'].append('-1')
                data['water'].append(False)

        df = pd.DataFrame(data)
        trials = ctb.get_trials(df, shift_factor=0, angle_type='radians', counterclockwise=True)
        trials = pd.DataFrame(trials, columns=['trials'])
        lin_position = ctb.linearize_trajectory(df, angle_type='radians', shift_factor=0)
        lin_position = pd.DataFrame(lin_position, columns=['lin_position'])
        df = pd.concat([df, lin_position, trials], axis=1)
        result_path = pjoin(output_path, mouse)
        df.to_feather(pjoin(result_path, '{}_{}.feat'.format(mouse, session)))

ValueError: All arrays must be of the same length

In [None]:
circle_track['frame'] = np.arange(len(circle_track))
data_out = circle_track[circle_track['event'] == 'LOCATION'].copy()
events = circle_track[circle_track['event']!='LOCATION'].copy()
data_out[['x', 'y', 'ang']] = data_out['data'].apply(lambda d: pd.Series(re.search(r'X(?P<x>[0-9]+)Y(?P<y>[0-9]+)A(?P<ang>[0-9]+)', d).groupdict()))
data_out['lick_port'] = -1
data_out['water'] = False
for _, row in events.iterrows():
    ts = row['timestamp']
    idx = data_out.iloc[np.argmin(np.abs(data_out['timestamp'] - ts))].name
    try:
        port = int(row['data'][-1])
    except TypeError:
        continue
    data_out.loc[idx, 'lick_port'] = port
    if row['event'] == 'REWARD':
        data_out.loc[idx, 'water'] = True

In [183]:
any(np.isnan(data['water']))

False

In [189]:
circle_track[circle_track['event'] == 'LICK']

Unnamed: 0,timestamp,event,data
121,3.766312,LICK,reward5
128,3.942840,LICK,reward5
132,4.057535,LICK,reward5
139,4.217106,LICK,reward5
145,4.389156,LICK,reward5
...,...,...,...
62198,1734.882241,LICK,reward4
62203,1735.004918,LICK,reward4
62214,1735.287471,LICK,reward4
62884,1755.057612,LICK,reward3


In [185]:
data

{'x': [310.0,
  310.0,
  314.0,
  316.0,
  319.0,
  321.0,
  324.0,
  325.0,
  326.0,
  326.0,
  328.0,
  329.0,
  329.0,
  329.0,
  329.0,
  328.0,
  328.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  328.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  329.0,
  330.0,
  330.0,
  329.0,
  330.0,
  331.0,
  332.0,
  333.0,
  336.0,
  339.0,
  343.0,
  345.0,
  345.0,
  348.0,
  353.0,
  356.0,
  360.0,
  362.0,
  368.0,
  372.0,
  378.0,
  382.0,
  386.0,
  391.0,
  395.0,
  400.0,
  404.0,
  407.0,
  410.0,
  414.0,
  417.0,
  417.0,
  423.0,
  426.0,
  426.0,
  428.0,
  431.0,
  433.0,
  436.0,
  438.0,
  439.0,
  439.0,
  440.0,
  440.0,
  440.0,
  440.0,
  440.0,
  440.0,
  440.0,
  439.0,
  439.0,
  439.0,
  439.0,
  439.0,
  440.0,
  440.0,
  440.0,
  441.0,
  441.0,
  441.0,
  441.0,
  442.0,
  442.0,
  443.0,
  442.0,
  443.0,
  442.0,
  443.0,
  443.0,
  444.0,
  444.0,
  444.0,
  444.0,
  444.0,
  445