<a href="https://colab.research.google.com/github/a-v-kolos/Allen_dataset_neuromatch_2025/blob/anastasiia_kolos/data_extraction_attempt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#!pip install allensdk
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.manifold import TSNE

#import brain_observatory_utilities.datasets.optical_physiology.data_formatting as ophys_formatting
#import brain_observatory_utilities.utilities.general_utilities as utilities

from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache

pd.set_option('display.max_columns', 500)

#ModuleNotFoundError: No module named 'brain_observatory_utilities'

In [3]:
# Создаем кеш для загрузки данных
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir="/." )

# Получаем список экспериментов
experiments = cache.get_ophys_experiment_table()

# Выбираем experiment_id (можно взять из таблицы)
experiment_id = experiments.index[0]  # первый эксперимент в таблице

# Загружаем данные эксперимента
dataset = cache.get_behavior_ophys_experiment(experiment_id)

# Получаем traces (dF/F сигналы)
traces = dataset.dff_traces


VisualBehaviorOphysProjectCache.construct_local_manifest()

to avoid needlessly downloading duplicates of data files that did not change between data releases. NOTE: running this method will require hashing every data file you have currently downloaded and could be very time consuming.


/_downloaded_data.json

is not deleted between instantiations of this cache
ophys_session_table.csv: 100%|██████████| 247k/247k [00:00<00:00, 1.88MMB/s] 
behavior_session_table.csv: 100%|██████████| 1.59M/1.59M [00:00<00:00, 10.7MMB/s]
ophys_experiment_table.csv: 100%|██████████| 657k/657k [00:00<00:00, 4.82MMB/s] 
ophys_cells_table.csv: 100%|██████████| 4.28M/4.28M [00:00<00:00, 13.5MMB/s]
	As of AllenSDK version 2.16.0, the latest Visual Behavior Ophys data has been significantly updated from previous releases. Specifically the user will need to update all processing of the stimulus_presentations tables. These tables now include multiple stimulus types delineated by the columns `stimulus_block` and 

In [4]:
experiments.head()

Unnamed: 0_level_0,behavior_session_id,ophys_session_id,ophys_container_id,mouse_id,indicator,full_genotype,driver_line,cre_line,reporter_line,sex,age_in_days,imaging_depth,targeted_structure,targeted_imaging_depth,imaging_plane_group,project_code,session_type,session_number,image_set,behavior_type,passive,experience_level,prior_exposures_to_session_type,prior_exposures_to_image_set,prior_exposures_to_omissions,date_of_acquisition,equipment_name,published_at,isi_experiment_id,file_id
ophys_experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
951980471,951520319,951410079,1018028342,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,150,VISp,150,0,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,0
951980473,951520319,951410079,1018028345,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,225,VISp,225,0,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,1
951980475,951520319,951410079,1018028339,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,75,VISp,75,1,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,2
951980479,951520319,951410079,1018028354,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,150,VISl,150,2,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,3
951980481,951520319,951410079,1018028357,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,225,VISl,225,2,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,4


In [5]:
traces.head()

Unnamed: 0_level_0,cell_roi_id,dff
cell_specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1086613265,1080743723,"[0.9365729093551636, 0.5824856162071228, 1.296..."
1086613823,1080743752,"[0.555761456489563, 0.7478731870651245, 0.4397..."
1086619526,1080743754,"[0.2593991160392761, 0.30149346590042114, 0.34..."
1086614149,1080743763,"[0.4112476706504822, 0.1977161169052124, 0.143..."
1086614351,1080743765,"[0.08977238088846207, 0.0746675506234169, 0.26..."


Начнем с мыши 470784

Загрузим все сессии для мыши 470784 из experiments

In [6]:
experiments.index[0]

951980471

Что такое ophys_experiment_id?

In [7]:
cache.get_behavior_ophys_experiment(ophys_experiment_id = 951980471)

core - cached version: 2.6.0-alpha, loaded version: 2.7.0
  self.warn_for_ignored_namespaces(ignored_namespaces)


<allensdk.brain_observatory.behavior.behavior_ophys_experiment.BehaviorOphysExperiment at 0x79f951b67710>

Теперь нужно еще раз посмотреть на маленький датасет и выбрать оттуда одну мышь, посмотреть совпадет ли результат, и если да, то написать функцию с циклом.

И посмотреть потом другие условия сортировки.

In [8]:
index_85 = cache.get_behavior_ophys_experiment(ophys_experiment_id = 994278281)

behavior_ophys_experiment_994278281.nwb: 100%|██████████| 313M/313M [00:09<00:00, 33.2MMB/s]
core - cached version: 2.6.0-alpha, loaded version: 2.7.0
  self.warn_for_ignored_namespaces(ignored_namespaces)


In [9]:
cache.get_ophys_experiment_table()

Unnamed: 0_level_0,behavior_session_id,ophys_session_id,ophys_container_id,mouse_id,indicator,full_genotype,driver_line,cre_line,reporter_line,sex,age_in_days,imaging_depth,targeted_structure,targeted_imaging_depth,imaging_plane_group,project_code,session_type,session_number,image_set,behavior_type,passive,experience_level,prior_exposures_to_session_type,prior_exposures_to_image_set,prior_exposures_to_omissions,date_of_acquisition,equipment_name,published_at,isi_experiment_id,file_id
ophys_experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
951980471,951520319,951410079,1018028342,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,150,VISp,150,0,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,0
951980473,951520319,951410079,1018028345,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,225,VISp,225,0,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,1
951980475,951520319,951410079,1018028339,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,75,VISp,75,1,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,2
951980479,951520319,951410079,1018028354,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,150,VISl,150,2,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,3
951980481,951520319,951410079,1018028357,457841,GCaMP6f,Sst-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Sst-IRES-Cre],Sst-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,206,225,VISl,225,2,VisualBehaviorMultiscope,OPHYS_1_images_A,1,A,active_behavior,False,Familiar,0,65,0,2019-09-20 09:59:38.837000+00:00,MESO.1,2021-03-25,848974280,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
993590480,993537991,993512923,991913246,489066,GCaMP6f,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Vip-IRES-Cre],Vip-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,128,175,VISp,175,,VisualBehaviorTask1B,OPHYS_2_images_B_passive,2,B,passive_viewing,True,Familiar,0,12,2,2019-12-10 12:25:49.549000+00:00,CAM2P.4,2021-03-25,967103425,1917
993862120,993691759,993668836,991913246,489066,GCaMP6f,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Vip-IRES-Cre],Vip-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,129,175,VISp,175,,VisualBehaviorTask1B,OPHYS_4_images_A,4,A,active_behavior,False,Novel 1,0,0,3,2019-12-11 08:57:44.877000+00:00,CAM2P.4,2021-03-25,967103425,1918
994053909,993959724,993948045,991913246,489066,GCaMP6f,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Vip-IRES-Cre],Vip-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,130,175,VISp,175,,VisualBehaviorTask1B,OPHYS_5_images_A_passive,5,A,passive_viewing,True,Novel >1,0,1,4,2019-12-12 09:29:20.785000+00:00,CAM2P.4,2021-03-25,967103425,1919
994791582,994710581,994689422,991913246,489066,GCaMP6f,Vip-IRES-Cre/wt;Ai148(TIT2L-GC6f-ICL-tTA2)/wt,[Vip-IRES-Cre],Vip-IRES-Cre,Ai148(TIT2L-GC6f-ICL-tTA2),F,134,175,VISp,175,,VisualBehaviorTask1B,OPHYS_6_images_A,6,A,active_behavior,False,Novel >1,0,2,5,2019-12-16 09:27:10.443000+00:00,CAM2P.4,2021-03-25,967103425,1920


In [10]:
index_85.eye_tracking

Unnamed: 0_level_0,timestamps,cr_area,eye_area,pupil_area,likely_blink,pupil_area_raw,cr_area_raw,eye_area_raw,cr_center_x,cr_center_y,cr_width,cr_height,cr_phi,eye_center_x,eye_center_y,eye_width,eye_height,eye_phi,pupil_center_x,pupil_center_y,pupil_width,pupil_height,pupil_phi
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,0.37209,140.847391,67495.475720,15170.863357,False,15170.863357,140.847391,67495.475720,323.667788,325.850281,5.683365,7.888482,0.311715,343.045475,308.223660,160.163675,134.140760,-0.089899,321.523907,316.833534,65.090696,69.491264,-0.301629
1,0.38237,142.774751,67795.884093,15603.757400,False,15603.757400,142.774751,67795.884093,323.775806,324.296923,5.819092,7.809915,0.341132,343.785835,308.251036,160.782403,134.219291,-0.093592,320.962085,314.777972,64.747116,70.475742,-0.017751
2,0.38714,143.946381,67377.889962,16009.471184,False,16009.471184,143.946381,67377.889962,324.586499,325.144954,6.134908,7.468663,0.383407,344.142811,308.510901,160.148285,133.919939,-0.093933,321.204512,315.691988,66.065022,71.386084,-0.149651
3,0.42064,151.929212,66921.148605,15856.897584,False,15856.897584,151.929212,66921.148605,324.067674,325.679200,6.358265,7.605938,0.443414,343.493298,308.702761,159.985462,133.147493,-0.104477,321.578053,316.542418,66.241724,71.045107,-0.092668
4,0.45409,,,,True,15983.102863,140.960921,67881.010384,324.151246,323.979645,5.891982,7.615308,0.324863,343.859177,306.932906,,,,321.197549,314.719466,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136000,4533.60697,,,,True,31769.618511,93.882190,64196.552929,325.185995,316.042488,5.188001,5.760144,-0.480468,340.950252,302.765102,,,,335.459320,308.420243,,,
136001,4533.63823,,,,True,32027.457273,109.060863,64683.339091,325.548166,315.637564,5.791112,5.994557,0.741780,341.284667,302.785867,,,,333.852640,308.700430,,,
136002,4533.67172,,,,True,31381.978487,98.000349,65081.291024,326.187558,315.818710,5.859079,5.324127,-0.327992,341.560411,303.003899,,,,335.243262,302.440505,,,
136003,4533.70483,,,,True,30700.206871,92.439630,65669.343841,326.345550,315.749830,5.612945,5.242247,-0.158766,341.919674,303.388081,,,,334.974675,302.610235,,,


In [11]:
0.1227215975522995-0.37209

-0.24936840244770048

In [14]:
index_85.dff_traces

Unnamed: 0_level_0,cell_roi_id,dff
cell_specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1086496689,1080691436,"[0.1227215975522995, -0.03220267966389656, 0.1..."
1086501573,1080691448,"[0.21577566862106323, 0.26856759190559387, 0.0..."
1086500533,1080691462,"[0.20203185081481934, 0.24988459050655365, 0.2..."
1086497438,1080691464,"[0.2895065248012543, 0.010245929472148418, 0.1..."
1086497908,1080691486,"[0.31947633624076843, 0.07463130354881287, 0.2..."
1086501720,1080691501,"[0.446819543838501, 0.4650295376777649, 0.2306..."
1086497129,1080691509,"[0.22304809093475342, 0.18552088737487793, 0.1..."
1086497034,1080691515,"[0.10704667121171951, 0.0, 0.24148842692375183..."
1086496500,1080691522,"[0.4812394976615906, 0.23438869416713715, 0.08..."
1086498071,1080691529,"[0.2774941921234131, 0.04747781902551651, 0.16..."


In [None]:
# Загрузите необходимые таблицы
experiments_table = cache.get_ophys_experiment_table
cells_table = cache.get_ophys_cells_table
ophys_sessions_table = cache.get_ophys_session_table()
behavior_sessions_table = cache.get_behavior_session_table()

# Получите данные о событиях стимула и ответах клеток
# Вам нужно будет выбрать конкретный эксперимент (ophys_experiment_id)
# Например, для первого эксперимента в таблице:
exp_id = experiments_table.index[0]
dataset = cache.get_behavior_ophys_experiment(exp_id)

# Получите таблицу с ответами клеток
cell_responses = dataset.get_cell_specimen_table()

# Получите данные о стимулах
stimulus_table = dataset.stimulus_presentations

# Создайте DataFrame, аналогичный вашему примеру
# Это потребует объединения данных из нескольких таблиц
df = pd.DataFrame({
    'stimulus_presentations_id': stimulus_table.index,
    'cell_specimen_id': cell_responses.index[0],  # Пример для одной клетки
    'trace': [np.array([...])],  # Здесь должны быть фактические данные трейса
    'trace_timestamps': [np.array([...])],  # Таймстемпы для трейса
    'mean_response': stimulus_table['mean_response'],
    'baseline_response': stimulus_table['baseline_response'],
    'image_name': stimulus_table['image_name'],
    'image_index': stimulus_table['image_index'],
    'is_change': stimulus_table['is_change'],
    'omitted': stimulus_table['omitted'],
    'mean_running_speed': stimulus_table['mean_running_speed'],
    'mean_pupil_area': stimulus_table['mean_pupil_area'],
    'response_latency': None,  # Эти данные могут быть в другой таблице
    'rewarded': stimulus_table['rewarded'],
    'ophys_experiment_id': exp_id,
    # Добавьте остальные поля из вашего примера, объединяя данные из разных таблиц
})

# Объедините с метаданными экспериментов
df = df.merge(experiments_table, on='ophys_experiment_id', how='left')

# Объедините с метаданными клеток
df = df.merge(cells_table, on='cell_specimen_id', how='left')

# Выберите только нужные колонки, как в вашем примере
final_columns = [
    'stimulus_presentations_id', 'cell_specimen_id', 'trace', 'trace_timestamps',
    'mean_response', 'baseline_response', 'image_name', 'image_index',
    'is_change', 'omitted', 'mean_running_speed', 'mean_pupil_area',
    'response_latency', 'rewarded', 'ophys_experiment_id', 'imaging_depth',
    'targeted_structure', 'cre_line', 'session_type', 'session_number',
    'mouse_id', 'ophys_session_id', 'ophys_container_id', 'behavior_session_id',
    'full_genotype', 'reporter_line', 'driver_line', 'indicator', 'sex',
    'age_in_days', 'exposure_level'
]

df = df[final_columns]

AttributeError: 'function' object has no attribute 'index'