## the following gathers all data from the 3/25/21 visual behavior release

In [1]:
import pandas as pd
import numpy as np
import visual_behavior.database as db
from allensdk.brain_observatory.behavior.behavior_project_cache import BehaviorProjectCache
import visual_behavior.data_access.loading as loading

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

pd.set_option('display.max_columns', 500)

  from pandas.util.testing import assert_frame_equal


## get all data with existing methods

In [2]:
cache = BehaviorProjectCache.from_lims(manifest=loading.get_manifest_path())
behavior_sessions = cache.get_behavior_session_table()
ophys_experiments = loading.get_filtered_ophys_experiment_table(include_failed_data=True)

Getting behavior-only session data. This might take a while...


## get paths with release data from Wayne (the final word!!!)

In [3]:
paths = {
    'behavior_only-behavior_session_id': '/allen/aibs/technology/waynew/behavior/behavior_only_nwb/20210224_list_of_3021_behavior_ONLY_session_ids_for_release.csv',
    'behavior_ophys-behavior_session_id': '/allen/aibs/technology/waynew/behavior/behavior_only_nwb/20210224_list_of_551_behavior_session_ids_for_released_ophys.csv',
    'behavior_ophys-ophys_session_id': '/allen/aibs/technology/waynew/behavior/behavior_only_nwb/20210224_list_of_551_ophys_session_ids_for_release.csv',
    'behavior_ophys-ophys_experiment_id': '/allen/aibs/technology/waynew/behavior/behavior_only_nwb/20210224_list_of_1165_ophys_experiment_ids_for_release.csv',
}

ids = {key: pd.read_csv(path, header=None, names=[key.split('-')[1]]) for key, path in paths.items()}  

## a dataset is either a behavior-only session, in which case it has only a behavior session ID, or it is an ophys experiment, in which case there is a one to one mapping between behavior session ID and ophys session ID, and a one to many mapping between ophys session id and ophys_experiment ids 

## Therefore, we need only the behavior only behavior session IDs and the behavior/ophys ophys experiment IDs in order to describe the full dataset

## here, each row, or 'dataset' will be one unit: a behavior_session for behavior-only; an ophys_experiment for behavior/ophys

In [4]:
# merge in behavior_sessions and ophys_sessions for the ophys_experiment_ids:
query_string = '''
    select oe.id as ophys_experiment_id, oe.ophys_session_id, bs.id as behavior_session_id
    from ophys_experiments as oe
    join ophys_sessions as os on oe.ophys_session_id = os.id
    join behavior_sessions as bs on bs.foraging_id = os.foraging_id
    where oe.id in {}
'''
db.lims_query(query_string.format(tuple(ids['behavior_ophys-ophys_experiment_id']['ophys_experiment_id'].values)))
ophys_data = db.lims_query(query_string.format(tuple(ids['behavior_ophys-ophys_experiment_id']['ophys_experiment_id'].values)))

# concatenate the behavior-only sessions with the ophys experiments
release_data = pd.concat([
    ids['behavior_only-behavior_session_id'],
    ophys_data,
])

# add a boolean 'is_ophys' based on the existence of the ophys_session_id
release_data['is_ophys'] = False
release_data.loc[release_data[pd.notnull(release_data['ophys_session_id'])].index, 'is_ophys'] = True

# merge in the behavior session columns:
release_data = release_data.merge(
    behavior_sessions.reset_index().drop(columns = ['ophys_session_id']),
    left_on = 'behavior_session_id',
    right_on = 'behavior_session_id',
    how = 'left',
)

# merge in the ophys_experiments columns, avoiding columns that already exist in the behavior_sessions table:
repeat_columns = [column for column in ophys_experiments.columns if column in list(behavior_sessions.reset_index().columns)]
release_data = release_data.merge(
    ophys_experiments.reset_index().drop(columns = repeat_columns),
    left_on = 'ophys_experiment_id',
    right_on = 'ophys_experiment_id',
    how = 'left',
) 

# set the ID colums to be pandas nullable ints 
# (to avoid having them cast to float when some are missing, see https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html)
for column in ['ophys_session_id','ophys_experiment_id']:
    release_data[column] = release_data[column].astype(pd.Int64Dtype())

In [5]:
release_data

Unnamed: 0,behavior_session_id,ophys_experiment_id,ophys_session_id,is_ophys,equipment_name,date_of_acquisition,donor_id,full_genotype,mouse_id,reporter_line,driver_line,sex,age_in_days,foraging_id,session_type,container_id,project_code,container_workflow_state,experiment_workflow_state,session_name,isi_experiment_id,specimen_id,imaging_depth,targeted_structure,published_at,super_container_id,cre_line,session_tags,failure_tags,prior_exposures_to_session_type,prior_exposures_to_image_set,prior_exposures_to_omissions,model_outputs_available,location,session_number
0,742008131,,,True,BEH.F-Box1,2018-08-24 14:51:25.667,722884873,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,403491,[Ai93(TITL-GCaMP6f)],"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,84.0,dcc5955c-2ea7-4408-997f-bb4b48c47e9b,0_gratings_autorewards_15min,,,,,,,,,,,,,,,,,,,,
1,742797917,,,True,BEH.F-Box1,2018-08-27 14:17:50.656,722884873,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,403491,[Ai93(TITL-GCaMP6f)],"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,87.0,d744c587-0130-45cb-92d5-398ae2b6fab8,1_gratings,,,,,,,,,,,,,,,,,,,,
2,743041166,,,True,BEH.F-Box1,2018-08-28 13:26:23.768,722884873,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,403491,[Ai93(TITL-GCaMP6f)],"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,88.0,1ebd5241-41e7-427c-a75c-5d73ce3cfc6e,1_gratings,,,,,,,,,,,,,,,,,,,,
3,743660302,,,True,BEH.F-Box1,2018-08-29 13:20:08.261,722884873,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,403491,[Ai93(TITL-GCaMP6f)],"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,89.0,c32f1613-461c-4136-a215-1ccfc1eb20d2,1_gratings,,,,,,,,,,,,,,,,,,,,
4,744374168,,,True,BEH.F-Box1,2018-08-30 13:00:52.782,722884873,Slc17a7-IRES2-Cre/wt;Camk2a-tTA/wt;Ai93(TITL-G...,403491,[Ai93(TITL-GCaMP6f)],"[Slc17a7-IRES2-Cre, Camk2a-tTA]",F,90.0,5b99aabb-93b3-45d8-826b-c417fc80783d,1_gratings,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4181,1067818154,1068173247,1067794510,True,CAM2P.3,2020-12-03 14:13:14.751,1045521969,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,538219,[Ai148(TIT2L-GC6f-ICL-tTA2)],[Vip-IRES-Cre],F,175.0,d0698c0a-4233-413d-9601-c5ae7e6ec576,OPHYS_5_images_B_passive,1.064333e+09,VisualBehavior,published,passed,20201203_538219_ophys5,1.048163e+09,1.045523e+09,175.0,VISp,2021-03-25 00:00:00.000000,1.045523e+09,Vip-IRES-Cre,,,0.0,1.0,5.0,False,Vip_VISp_175,5.0
4182,1069251048,1069286452,1069224939,True,CAM2P.3,2020-12-09 14:28:37.020,1045521969,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,538219,[Ai148(TIT2L-GC6f-ICL-tTA2)],[Vip-IRES-Cre],F,181.0,275d1628-052a-457a-8d59-e4b2eefb17cc,OPHYS_6_images_B,1.064333e+09,VisualBehavior,published,passed,538219_20201209_6imagesBretake,1.048163e+09,1.045523e+09,175.0,VISp,2021-03-25 00:00:00.000000,1.045523e+09,Vip-IRES-Cre,,,1.0,3.0,7.0,False,Vip_VISp_175,6.0
4183,1069254185,1069286458,1069219822,True,CAM2P.4,2020-12-09 14:47:07.577,1050611348,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,544261,[Ai148(TIT2L-GC6f-ICL-tTA2)],[Vip-IRES-Cre],F,142.0,d04d16e3-c4fc-4c7d-b14e-dc473031e234,OPHYS_5_images_B_passive,1.064333e+09,VisualBehavior,published,passed,544261_20201209_5imagesB_retake,1.053467e+09,1.050612e+09,175.0,VISp,2021-03-25 00:00:00.000000,1.050612e+09,Vip-IRES-Cre,,,1.0,3.0,7.0,False,Vip_VISp_175,5.0
4184,1071017498,1071070929,1070970140,True,CAM2P.4,2020-12-16 15:11:46.249,1050611348,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,544261,[Ai148(TIT2L-GC6f-ICL-tTA2)],[Vip-IRES-Cre],F,149.0,9f3b2f4d-7cf3-4ce9-9cce-0af776ec81e1,OPHYS_6_images_B,1.064333e+09,VisualBehavior,published,passed,20201216_544261_Session6,1.053467e+09,1.050612e+09,175.0,VISp,2021-03-25 00:00:00.000000,1.050612e+09,Vip-IRES-Cre,,d_prime_peak,1.0,6.0,10.0,False,Vip_VISp_175,6.0


## check lengths by asserting that the number of rows in the `release_data` table is equal to the number of behavior-only behavior session IDs + the number of behavior/ophys ophys experiment IDs

In [6]:
print(len(release_data))

4186


In [7]:
assert len(release_data) == (len(ids['behavior_only-behavior_session_id']) + len(ids['behavior_ophys-ophys_experiment_id']))

## save out the master list

In [8]:
release_data.to_csv('/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/visual_behavior_spring_2021_release_master_list.csv', index=False)