In [1]:
%matplotlib notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [28]:
import pandas as pd
import visual_behavior.database as db
import allensdk.brain_observatory.behavior.behavior_project_cache as bpc

my_cache_dir = '//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/visual_behavior_production_analysis/'

# bc = bpc.VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir=my_cache_dir)
bc = bpc.VisualBehaviorOphysProjectCache.from_lims(data_release_date='2021-03-25')
          
behavior_session_table = bc.get_behavior_session_table().sort_values(by = ['mouse_id', 'date_of_acquisition'])

## make a column that assigns an ordered number to each phase (eg. training_0 becomes 0.0, training_1 becomes 0.1, ophys_0 becomes 1.0, etc)
name_map = {'TRAINING': '0', 'OPHYS': '1'}
behavior_session_table['session_type_ordered'] = behavior_session_table['session_type'].map(lambda sts: float(name_map[sts.split('_')[0]] + '.' + sts.split('_')[1]))

## some more columns useful for filtering
behavior_session_table['next_session_type_ordered'] = behavior_session_table['session_type_ordered'].shift(-1)
behavior_session_table['next_mouse_id'] = behavior_session_table['mouse_id'].shift(-1)
behavior_session_table['next_session_type'] = behavior_session_table['session_type'].shift(-1)
behavior_session_table['previous_session_type'] = behavior_session_table['session_type'].shift(1)

## identify sessions that were run out of order
misordered_sessions = behavior_session_table.query('next_session_type_ordered < session_type_ordered and next_session_type_ordered < 1 and mouse_id == next_mouse_id').copy()
misordered_sessions['pkl_stage'] = None    

## get stage from PKL file
def get_pkl_stage(bsid):
    data = pd.read_pickle(db.get_pkl_path(bsid))
    return data['items']['behavior']['params']['stage']

for idx, row in misordered_sessions.iterrows():
    misordered_sessions.at[idx, 'pkl_stage'] = get_pkl_stage(idx)

## list of columns to display
cols = [
    'mouse_id',
    'date_of_acquisition',
    'session_type',
    'previous_session_type',
    'next_session_type',
    'pkl_stage',
    'equipment_name',
    'mismatch'
]

## identify sessions where 'session_type' does not match 'pkl_stage'
misordered_sessions['mismatch'] = misordered_sessions['pkl_stage'] != misordered_sessions['session_type']

## display them
misordered_sessions.query('mismatch')[cols]

Unnamed: 0_level_0,mouse_id,date_of_acquisition,session_type,previous_session_type,next_session_type,pkl_stage,equipment_name,mismatch
behavior_session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [9]:
print(misordered_sessions.query('mismatch')[cols].to_markdown())

|   behavior_session_id |   mouse_id | date_of_acquisition     | session_type                      | previous_session_type              | next_session_type                  | pkl_stage                          | equipment_name   | mismatch   |
|----------------------:|-----------:|:------------------------|:----------------------------------|:-----------------------------------|:-----------------------------------|:-----------------------------------|:-----------------|:-----------|
|             863571072 |     435431 | 2019-05-06 13:04:34.653 | OPHYS_5_images_B_passive          | TRAINING_4_images_A_training       | TRAINING_4_images_A_training       | TRAINING_4_images_A_training       | BEH.B-Box5       | True       |
|             863571054 |     449653 | 2019-05-06 12:49:50.929 | OPHYS_7_receptive_field_mapping   | TRAINING_4_images_A_training       | TRAINING_5_images_A_epilogue       | TRAINING_5_images_A_epilogue       | BEH.G-Box5       | True       |
|             863571063 

In [24]:
df = misordered_sessions.query('mismatch')[['pkl_stage']].rename(columns = {'pkl_stage':'correct_session_type'})
df.to_dict()

{'correct_session_type': {863571072: 'TRAINING_4_images_A_training',
  863571054: 'TRAINING_5_images_A_epilogue',
  863571063: 'TRAINING_1_gratings',
  885418521: 'TRAINING_5_images_A_handoff_lapsed',
  902810506: 'TRAINING_3_images_B_10uL_reward',
  914219174: 'TRAINING_5_images_B_handoff_ready'}}

In [20]:
df.to_dict()

{'behavior_session_id': {0: 863571072,
  1: 863571054,
  2: 863571063,
  3: 885418521,
  4: 902810506,
  5: 914219174},
 'correct_session_type': {0: 'TRAINING_4_images_A_training',
  1: 'TRAINING_5_images_A_epilogue',
  2: 'TRAINING_1_gratings',
  3: 'TRAINING_5_images_A_handoff_lapsed',
  4: 'TRAINING_3_images_B_10uL_reward',
  5: 'TRAINING_5_images_B_handoff_ready'}}

In [27]:
corrected_session_types = pd.DataFrame({
    'correct_session_type': {
        863571072: 'TRAINING_4_images_A_training',
        863571054: 'TRAINING_5_images_A_epilogue',
        863571063: 'TRAINING_1_gratings',
        885418521: 'TRAINING_5_images_A_handoff_lapsed',
        902810506: 'TRAINING_3_images_B_10uL_reward',
        914219174: 'TRAINING_5_images_B_handoff_ready'
    }
})
for behavior_session_id, row in corrected_session_types.iterrows():
    behavior_session_table.at[behavior_session_id, 'session_type'] = row['correct_session_type']