# Example Notebook for Tabulate Behavior Data 

In [1]:
#import relevant packages 
import aopy
from aopy.data import db
import numpy as np
import sys
import pandas as pd
import os
import h5py
from tqdm import tqdm
import time
import datetime
from IPython.display import display, Markdown



In [2]:
# Set initial database & directory. 
isHuman = False

if isHuman:
    db.BMI3D_DBNAME = 'human'
    preproc_dir = '/data/preprocessed-human'
else:
    db.BMI3D_DBNAME = 'default'
    preproc_dir = '/data/preprocessed'

In [3]:
# Get some data. In this example, use a range of Affi data 
entries = db.lookup_mc_sessions(subject='affi', date=('2023-05-22', '2023-05-28')) 
#collect data from this date range 
print(entries)

[2023-05-22 11:00:50.263000: affi on manual control task, id=9402, 2023-05-22 11:04:44.251000: affi on manual control task, id=9403, 2023-05-22 11:09:32.745000: affi on manual control task, id=9404, 2023-05-23 10:36:42.160000: affi on manual control task, id=9463, 2023-05-23 10:40:46.670000: affi on manual control task, id=9464, 2023-05-23 10:45:06.441000: affi on manual control task, id=9465, 2023-05-24 11:31:21.675000: affi on manual control task, id=9477, 2023-05-24 11:35:25.062000: affi on manual control task, id=9478, 2023-05-24 11:44:46.034000: affi on manual control task, id=9480, 2023-05-25 10:54:17.201000: affi on manual control task, id=9492, 2023-05-25 10:58:30.750000: affi on manual control task, id=9493, 2023-05-25 11:08:15.730000: affi on manual control task, id=9494, 2023-05-26 11:26:15.494000: affi on manual control task, id=9506, 2023-05-26 11:30:34.792000: affi on manual control task, id=9507, 2023-05-26 11:38:14.622000: affi on manual control task, id=9508, 2023-05-2

In [4]:
#Use database functions to get relevant task information such as subject, task id, dates. 
subjects, te_ids, dates = db.list_entry_details(entries)
#Each of these will be a tuple with values for each entry 

In [5]:
#select one entry to get an idea of what the tabulate function needs
entry1 = entries[0]
print(entry1)

2023-05-22 11:00:50.263000: affi on manual control task, id=9402


In [6]:
#Load example entry using preproc_exp_data. Tabulate behavior will call this function for each entry. 
exp_data, exp_metadata = aopy.data.base.load_preproc_exp_data(preproc_dir, 
                                            'affi', 9402, '2023-05-22', cached=True)

#exp_data is a dictionary. 
#The events key contains a 4X1 nested array of timestamps, event codes, event names & data indices
exp_data['events'][0:20]

array([(3.51464, 238, b'TIME_ZERO', 0), (5.02072,  16, b'TARGET_ON', 0),
       (5.0292 ,  80, b'CURSOR_ENTER_TARGET', 0),
       (5.18752,  21, b'TARGET_ON', 5), (5.42884,  32, b'TARGET_OFF', 0),
       (6.0542 ,  85, b'CURSOR_ENTER_TARGET', 5),
       (6.22896,  48, b'REWARD', 0), (6.38772, 239, b'TRIAL_END', 0),
       (6.52052,  16, b'TARGET_ON', 0),
       (6.8042 ,  80, b'CURSOR_ENTER_TARGET', 0),
       (6.96264,  20, b'TARGET_ON', 4), (7.04544,  32, b'TARGET_OFF', 0),
       (7.4624 ,  84, b'CURSOR_ENTER_TARGET', 4),
       (7.63728,  48, b'REWARD', 0), (7.79628, 239, b'TRIAL_END', 0),
       (7.9288 ,  16, b'TARGET_ON', 0),
       (8.3456 ,  80, b'CURSOR_ENTER_TARGET', 0),
       (8.50416,  22, b'TARGET_ON', 6), (8.9454 ,  32, b'TARGET_OFF', 0),
       (9.34588,  86, b'CURSOR_ENTER_TARGET', 6)],
      dtype=[('timestamp', '<f8'), ('code', 'u1'), ('event', 'S32'), ('data', '<u4')])

The tabulate functions will use these task codes (integer ID and name) as the input arguments for trial_start, trial_end, reward and penalty to create segments for each trial. Tabulate_behavior can take the codes or data dimensions as part of the optional input event_code_type argument. An initial task code assignment file (.yaml) can be found in the ~/analyze/aopy/config folder. Most of the wrapper tabulate functions will call this, removing the need for user input on the trial_start & trial_end arguments. 

In [7]:
#load task codes 
task_codes = aopy.data.load_bmi3d_task_codes() #empty argument loads task_codes.yaml file 
print(task_codes)
#If using task that has different pairing of events to codes then a new .yaml file can be made with the correct matching. 

{'CENTER_TARGET_OFF': 32, 'CENTER_TARGET_ON': 16, 'CURSOR_ENTER_CENTER_TARGET': 80, 'CURSOR_ENTER_PERIPHERAL_TARGET': [81, 82, 83, 84, 85, 86, 87, 88], 'DELAY_PENALTY': 66, 'HOLD_PENALTY': 64, 'PAUSE': 254, 'PERIPHERAL_TARGET_ON': [17, 18, 19, 20, 21, 22, 23, 24], 'REWARD': 48, 'TIMEOUT_PENALTY': 65, 'TIME_ZERO': 238, 'TRIAL_END': 239, 'TRIAL_START': 2, 'CURSOR_ENTER_TARGET': 80, 'CURSOR_LEAVE_TARGET': 96, 'OTHER_PENALTY': 79, 'PAUSE_START': 128, 'PAUSE_END': 129}


In [8]:
#Get the desired set of codes for start, end, reward and penalty events. 
#These can be changed to any existing event in the task_code file. 
#In this example take from center target on to end of trial. 
trial_start_codes = [task_codes['CENTER_TARGET_ON']]
trial_end_codes = [task_codes['TRIAL_END']]
reward_codes = [task_codes['REWARD']]
penalty_codes = [task_codes['HOLD_PENALTY'], task_codes['TIMEOUT_PENALTY'],
                 task_codes['DELAY_PENALTY'], #task_codes['FIXATION_PENALTY'],
                task_codes['OTHER_PENALTY']]

In [9]:
#Tabulate the data
df = aopy.data.tabulate_behavior_data(preproc_dir, subjects, te_ids, dates,
                                                   trial_start_codes, trial_end_codes,
                                                   reward_codes,penalty_codes)
display(df)

  0%|          | 0/18 [00:00<?, ?it/s]

Unnamed: 0,subject,te_id,date,event_codes,event_times,reward,penalty
0,affi,9402,2023-05-22,"[16, 80, 21, 32, 85, 48, 239]","[5.02072, 5.0292, 5.18752, 5.42884, 6.0542, 6....",True,False
1,affi,9402,2023-05-22,"[16, 80, 20, 32, 84, 48, 239]","[6.52052, 6.8042, 6.96264, 7.04544, 7.4624, 7....",True,False
2,affi,9402,2023-05-22,"[16, 80, 22, 32, 86, 48, 239]","[7.9288, 8.3456, 8.50416, 8.9454, 9.34588, 9.5...",True,False
3,affi,9402,2023-05-22,"[16, 80, 17, 32, 81, 48, 239]","[9.86216, 10.09564, 10.25408, 10.40384, 10.829...",True,False
4,affi,9402,2023-05-22,"[16, 80, 23, 32, 87, 48, 239]","[11.36216, 11.67904, 11.83772, 12.17996, 12.64...",True,False
...,...,...,...,...,...,...,...
3094,affi,9522,2023-05-27,"[16, 80, 21, 66, 239]","[380.06828, 380.17668, 380.3342, 380.92856, 38...",False,True
3095,affi,9522,2023-05-27,"[16, 80, 21, 32, 85, 48, 239]","[381.78492, 381.9602, 382.11868, 382.70168, 38...",True,False
3096,affi,9522,2023-05-27,"[16, 80, 24, 32, 88, 48, 239]","[395.15992, 395.89352, 396.05172, 396.21012, 3...",True,False
3097,affi,9522,2023-05-27,"[16, 80, 20, 32, 84, 48, 239]","[397.32668, 397.40176, 397.56208, 398.03492, 3...",True,False


## Working with a Task without an Existing Wrapper Function
Many tasks already have existing functions that wrap around the core tabulate_behavior function and add task relevant information to that table (for example: tabulate_center_out). In the event that there is a new task structure, tabulate_behavior_data can be used to construct an initial data frame. Task relevant columns can then be added as necessary. 

In this example case, the task involved a moving a 3D cursor (small sphere) into a series of larger target sphere that appeared at random locations (x,y,z) in the world. 

In [10]:
#Make sure to be in correct database & directory 
db.BMI3D_DBNAME = 'human' #switch database  
preproc_dir = '/data/preprocessed-human' #switch directory  
entry = db.lookup_sessions(id=1021)
print(entry)
subjects, te_ids, dates = db.list_entry_details(entry)

[2024-12-03 11:52:20.714869: Marios on manual control task, id=1021]


In [11]:
#Load file to get an idea of the different events & codes in this task 
subject_hm = 'Marios'
id_hm = 1021
date_hm = '2024-12-03'
exp_data, exp_metadata = aopy.data.base.load_preproc_exp_data(preproc_dir, 
                                            subject_hm, id_hm, date_hm, cached=True)
exp_data['events'][0:20]

array([( 0.43085592, 238, b'TIME_ZERO', 0),
       ( 0.53287636,  16, b'TARGET_ON', 0),
       ( 3.33419922,  80, b'CURSOR_ENTER_TARGET', 0),
       ( 3.48965828,  64, b'HOLD_PENALTY', 0),
       ( 4.50075175, 239, b'TRIAL_END', 0),
       ( 4.51215393,  16, b'TARGET_ON', 0),
       ( 5.37907647,  80, b'CURSOR_ENTER_TARGET', 0),
       ( 5.56697736,  64, b'HOLD_PENALTY', 0),
       ( 6.57773676, 239, b'TRIAL_END', 0),
       ( 6.58909489,  16, b'TARGET_ON', 0),
       ( 6.98955023,  80, b'CURSOR_ENTER_TARGET', 0),
       ( 7.22361866,  48, b'REWARD', 0),
       ( 7.73399902, 239, b'TRIAL_END', 0),
       ( 7.84590579,  17, b'TARGET_ON', 1),
       (10.25732578,  81, b'CURSOR_ENTER_TARGET', 1),
       (10.48833736,  48, b'REWARD', 0),
       (10.9888225 , 239, b'TRIAL_END', 0),
       (11.10078932,  18, b'TARGET_ON', 2),
       (13.44481935,  82, b'CURSOR_ENTER_TARGET', 2),
       (13.67770084,  48, b'REWARD', 0)],
      dtype=[('timestamp', '<f8'), ('code', 'u1'), ('event', 'S32'), ('d

In this case, some of the task codes/entries are different from the baseline center out ones. As mentioned above, one option to pair the new codes is to create a new ".yaml" file with the matching events & codes. A second option is to load an example file and create a dictionary of the existing events with corresponding codes. 

### Alternative to Changing Task File

In [12]:
#Toy Function to pull out relevant codes for each event name
def find_codes(targ_array, target_value):
    code_matcher = [] #initialize 
    for row in targ_array: #for every entry (4 dimensional array: timestamp, codes, event, data)
        if row[2] == target_value and row[1] not in code_matcher: #match if not already in 
            code_matcher.append(row[1])
    return code_matcher

In [13]:
targ_array = exp_data['events'] #all events, codes, timestamps 
ev_keys = list(set(exp_data['events']['event'])) #get all event code names 
keys = [s.decode('utf-8') for s in ev_keys] # reformat from byte to string
sample_task = dict.fromkeys(keys,0) #initialize dict

#populate dictionary 
for idx, ky in enumerate(sample_task):
    sample_task[ky] = find_codes(targ_array,ev_keys[idx])

In [14]:
#The event names will be a bit different so match the necessary start, stop etc to the new event names.
reward_codes = sample_task['REWARD']
penalty_codes = sample_task['HOLD_PENALTY']+ sample_task['DELAY_PENALTY']
trial_end_codes = sample_task['TRIAL_END']
trial_start_codes = sample_task['TARGET_ON']

In [15]:
df_beh = aopy.data.bmi3d.tabulate_behavior_data(preproc_dir, subjects, te_ids , dates,
                                                   trial_start_codes, trial_end_codes,
                                                   reward_codes,penalty_codes)
display(df_beh)

  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,subject,te_id,date,event_codes,event_times,reward,penalty
0,Marios,1021,2024-12-03,"[16, 80, 64, 239]","[0.5328763616271317, 3.334199216682464, 3.4896...",False,True
1,Marios,1021,2024-12-03,"[16, 80, 64, 239]","[4.5121539337560534, 5.37907646689564, 5.56697...",False,True
2,Marios,1021,2024-12-03,"[16, 80, 48, 239]","[6.589094891678542, 6.9895502319559455, 7.2236...",True,False
3,Marios,1021,2024-12-03,"[17, 81, 48, 239]","[7.8459057859145105, 10.25732578476891, 10.488...",True,False
4,Marios,1021,2024-12-03,"[18, 82, 48, 239]","[11.10078932158649, 13.444819350726902, 13.677...",True,False
...,...,...,...,...,...,...,...
95,Marios,1021,2024-12-03,"[29, 93, 48, 239]","[277.91298533789814, 279.2335839876905, 279.46...",True,False
96,Marios,1021,2024-12-03,"[29, 93, 48, 239]","[280.08859699079767, 282.2669101296924, 282.49...",True,False
97,Marios,1021,2024-12-03,"[29, 93, 48, 239]","[283.1231632786803, 286.4781783237122, 286.700...",True,False
98,Marios,1021,2024-12-03,"[29, 93, 64, 239]","[287.32312778476626, 287.70038594165817, 287.7...",False,True


It might also be useful to add some task relevant data. n this example, adding the unique target location information for each trial. From the task structure, each unique target location has a different "CURSOR_ENTER_TARGET" value in the data field of the array. Using these values as an index can link the specific target for a given trial. 

In [16]:
#map unique target locations onto each trial 
idx_bool = np.array([np.array_equal(x,b'CURSOR_ENTER_TARGET') for x in exp_data['events']['event']])
targ_idx = exp_data['events']['data'][idx_bool]
#use get target locations function to get (x,y,z) coordinates for each target
target_loc = aopy.data.bmi3d.get_target_locations(preproc_dir, 
                                                       'Marios', 1021, '2024-12-03', targ_idx)

In [17]:
#Add to data frame
df_beh['target_idx'] = targ_idx
df_beh['target_location'] = target_loc.tolist()
display(df_beh)

Unnamed: 0,subject,te_id,date,event_codes,event_times,reward,penalty,target_idx,target_location
0,Marios,1021,2024-12-03,"[16, 80, 64, 239]","[0.5328763616271317, 3.334199216682464, 3.4896...",False,True,0,"[3.7882, 0.2797, -9.864]"
1,Marios,1021,2024-12-03,"[16, 80, 64, 239]","[4.5121539337560534, 5.37907646689564, 5.56697...",False,True,0,"[3.7882, 0.2797, -9.864]"
2,Marios,1021,2024-12-03,"[16, 80, 48, 239]","[6.589094891678542, 6.9895502319559455, 7.2236...",True,False,0,"[3.7882, 0.2797, -9.864]"
3,Marios,1021,2024-12-03,"[17, 81, 48, 239]","[7.8459057859145105, 10.25732578476891, 10.488...",True,False,1,"[-0.2369, -6.2414, -0.1721]"
4,Marios,1021,2024-12-03,"[18, 82, 48, 239]","[11.10078932158649, 13.444819350726902, 13.677...",True,False,2,"[8.154, 9.7902, -9.5683]"
...,...,...,...,...,...,...,...,...,...
95,Marios,1021,2024-12-03,"[29, 93, 48, 239]","[277.91298533789814, 279.2335839876905, 279.46...",True,False,70,"[-8.8936, 4.245, -3.5062]"
96,Marios,1021,2024-12-03,"[29, 93, 48, 239]","[280.08859699079767, 282.2669101296924, 282.49...",True,False,71,"[7.4059, -0.8664, -2.3757]"
97,Marios,1021,2024-12-03,"[29, 93, 48, 239]","[283.1231632786803, 286.4781783237122, 286.700...",True,False,72,"[-0.978, 9.7775, 6.0346]"
98,Marios,1021,2024-12-03,"[29, 93, 64, 239]","[287.32312778476626, 287.70038594165817, 287.7...",False,True,73,"[2.6701, 5.7285, -6.5313]"
