# Observations
---
1. Opening a file with `h5py` or `zarr` is >100,000x faster than opening a file with `pynwb`:

In [2]:
import pathlib

import pynwb
import h5py
import zarr
import lazynwb

datacube_path = pathlib.Path('/data/dynamicrouting_datacube_v0.0.265')
bad_session_ids = ('670181_2023-07-20', '681532_2023-10-19', '712141_2024-06-11')
nwb_paths = list(p for p in datacube_path.glob('nwb/*.nwb') if p.stem not in bad_session_ids)

len(nwb_paths)

245

In [2]:
%%timeit
pynwb.NWBHDF5IO(nwb_paths[0], mode='r').read()

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


2.64 s ± 216 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
%%timeit
h5py.File(nwb_paths[0], mode='r')

12.6 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


- **make ephemeral `h5py` or `zarr` instances to access data**
- **pass file paths to functions, not `pynwb` object instances**

---
2. Accessing data directly from an NWB file is not very intuitive:

In [17]:
h5py.File(nwb_paths[0], mode='r').keys()

<KeysViewHDF5 ['acquisition', 'analysis', 'file_create_date', 'general', 'identifier', 'intervals', 'processing', 'session_description', 'session_start_time', 'specifications', 'stimulus', 'timestamps_reference_time', 'units']>

In [21]:
h5py.File(nwb_paths[0], mode='r')["/general/subject/subject_id"].asstr()[()]

'644866'

In [33]:
h5py.File(nwb_paths[0], mode='r')["/processing/behavior/licks/timestamps"][:]

array([1079.20388, 1080.70636, 1080.86242, ..., 4468.20187, 4468.30388,
       4468.3729 ], shape=(2707,))

In [26]:
h5py.File(nwb_paths[0], mode='r')["/units"]["spike_times"].shape

(33040040,)

In [25]:
h5py.File(nwb_paths[0], mode='r')["/units"]["spike_times_index"].shape

(2652,)

- **provide functions to make common data access easier**

In [6]:
lazynwb.get_timeseries(nwb_paths[0], 'licks').timestamps[:]

array([1079.20388, 1080.70636, 1080.86242, ..., 4468.20187, 4468.30388,
       4468.3729 ], shape=(2707,))

In [6]:
lazynwb.get_metadata_df(nwb_paths[:10], as_polars=True)

Getting metadata:   0%|                                | 0/10 [00:00<?, ?file/s]

Getting metadata: 100%|███████████████████████| 10/10 [00:00<00:00, 13.48file/s]


identifier,session_start_time,session_id,session_description,experiment_description,experimenter,lab,institution,related_publications,keywords,notes,data_collection,surgery,pharmacology,virus,source_script,source_script_file_name,age,age__reference,description,genotype,sex,species,subject_id,weight,strain,date_of_birth,_nwb_path
str,"datetime[μs, UTC]",str,str,str,str,str,str,null,list[str],str,null,null,null,null,str,null,str,null,null,str,str,str,str,null,str,"datetime[μs, UTC]",str
"""89dd7188-7ff6-40db-a3f0-ed1286…",2024-10-11 18:34:19 UTC,"""741137_2024-10-11""","""ecephys session (day 4) with b…","""visual-auditory task-switching…","""Vayle Lafehr""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_65""]",,,,,,"""https://raw.githubusercontent.…",,"""P206D""",,,"""wt/wt""","""M""","""Mus musculus""","""741137""",,,2024-03-19 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""5a1de65a-9aeb-4e7a-9e2d-858f19…",2023-12-06 21:03:34 UTC,"""686176_2023-12-06""","""ecephys session (day 3) with b…","""visual-auditory task-switching…","""Hannah Cabasco""","""NP3""","""Neural Circuits & Behavior | M…",,"[""issues"", ""task"", … ""behavior_day_79""]","""; https://github.com/AllenInst…",,,,,"""https://raw.githubusercontent.…",,"""P194D""",,,"""VGAT-ChR2-YFP/wt""","""M""","""Mus musculus""","""686176""",,"""VGAT-ChR2-YFP(ND)""",2023-05-26 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""20bdd0ae-5056-4eb2-bfff-e539be…",2024-03-04 21:21:35 UTC,"""702136_2024-03-04""","""ecephys session (day 1) with b…","""visual-auditory task-switching…","""Vayle Lafehr""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_49""]",,,,,,"""https://raw.githubusercontent.…",,"""P180D""",,,"""wt/wt""","""F""","""Mus musculus""","""702136""",,"""C57BL6J(NP)""",2023-09-06 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""17ccc163-84d9-4c7d-8654-1505c1…",2025-01-16 22:01:37 UTC,"""744279_2025-01-16""","""ecephys session (day 4) withou…","""visual-auditory task-switching…","""Hannah Cabasco""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_86""]",,,,,,"""https://raw.githubusercontent.…",,"""P236D""",,,"""Sst-IRES-Cre/wt;Ai32(RCL-ChR2(…","""M""","""Mus musculus""","""744279""",,"""Sst-IRES-Cre;Ai32""",2024-05-25 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""b006cedb-8235-4f62-8adb-291c62…",2024-04-22 20:15:24 UTC,"""706401_2024-04-22""","""ecephys session (day 1) with b…","""visual-auditory task-switching…","""Hannah Cabasco""","""NP2""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_64""]",,,,,,"""https://raw.githubusercontent.…",,"""P199D""",,,"""Sst-IRES-Cre/wt;Ai32(RCL-ChR2(…","""M""","""Mus musculus""","""706401""",,"""Sst-IRES-Cre;Ai32""",2023-10-06 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""ac84d7bc-0546-4dfb-9048-d7156c…",2023-12-12 21:04:37 UTC,"""676909_2023-12-12""","""ecephys session (day 2) with b…","""visual-auditory task-switching…","""Ethan Mcbride""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_34""]","""; post-task Spontaneous & Opto…",,,,,"""https://raw.githubusercontent.…",,"""P257D""",,,"""Pvalb-IRES-Cre/wt;Ai32(RCL-ChR…","""M""","""Mus musculus""","""676909""",,"""Pvalb-IRES-Cre;Ai32""",2023-03-30 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""21dc8502-334f-47e8-ac94-5946c9…",2024-12-06 19:06:17 UTC,"""743199_2024-12-06""","""ecephys session (day 4) with b…","""visual-auditory task-switching…","""Vayle Lafehr""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_72""]",,,,,,"""https://raw.githubusercontent.…",,"""P202D""",,,"""VGAT-ChR2-YFP/wt""","""F""","""Mus musculus""","""743199""",,"""VGAT-ChR2-YFP""",2024-05-18 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""da9bcf31-6ddf-4bae-bb99-836e92…",2023-02-08 01:08:15 UTC,"""644866_2023-02-07""","""ecephys session (day 1) with b…","""visual-auditory task-switching…","""Jackie Kuyat""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_50""]",,,,,,"""https://github.com/samgale/Dyn…",,"""P189D""",,,"""wt/wt""","""F""","""Mus musculus""","""644866""",,"""C57BL6J(VB)""",2022-08-02 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""e05c0cd9-6ead-482b-bb71-be749f…",2024-09-20 17:23:33 UTC,"""733891_2024-09-20""","""ecephys session (day 5) with b…","""visual-auditory task-switching…","""Hannah Cabasco""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_50""]",,,,,,"""https://raw.githubusercontent.…",,"""P175D""",,,"""Vip-IRES-Cre/wt;Ai32(RCL-ChR2(…","""F""","""Mus musculus""","""733891""",,"""Vip-IRES-Cre;Ai32""",2024-03-29 07:00:00 UTC,"""/data/dynamicrouting_datacube_…"
"""e20517b4-9c39-4f70-baec-0e4f4f…",2024-08-07 19:03:44 UTC,"""713655_2024-08-07""","""ecephys session (day 3) with b…","""visual-auditory task-switching…","""Vayle Lafehr""","""NP3""","""Neural Circuits & Behavior | M…",,"[""task"", ""sync"", … ""behavior_day_105""]",,,,,,"""https://raw.githubusercontent.…",,"""P258D""",,,"""Sst-IRES-Cre/wt;Ai32(RCL-ChR2(…","""M""","""Mus musculus""","""713655""",,"""Sst-IRES-Cre;Ai32""",2023-11-23 08:00:00 UTC,"""/data/dynamicrouting_datacube_…"


In [7]:
lazynwb.get_df(nwb_paths[:10], 'units', as_polars=True)

Getting multi-NWB units table: 100%|███████████████████████████████████████████████████| 10/10 [00:02<00:00,  3.71NWB/s]


location,d_prime,sliding_rp_violation,nn_hit_rate,silhouette,firing_range,num_spikes,velocity_below,rp_violations,cluster_id,drift_mad,ccf_ap,snr,decoder_label,rp_contamination,electrode_group_name,ccf_dv,decoder_probability,nn_miss_rate,presence_ratio,unit_id,velocity_above,peak_trough_ratio,num_positive_peaks,repolarization_slope,recovery_slope,drift_ptp,amplitude_cutoff,isi_violations_ratio,is_not_drift,exp_decay,drift_std,peak_to_valley,firing_rate,amplitude_median,activity_drift,isi_violations_count,peak_waveform_index,amplitude,structure,isolation_distance,peak_electrode,id,peak_channel,default_qc,amplitude_cv_median,num_negative_peaks,ccf_ml,electrode_group,half_width,l_ratio,spread,amplitude_cv_range,_nwb_path,_table_path,_table_index
str,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64,f64,str,f64,str,f64,f64,f64,f64,str,f64,f64,i64,f64,f64,f64,f64,f64,bool,f64,f64,f64,f64,f64,f64,f64,i64,f64,str,f64,i64,i64,i64,bool,f64,i64,f64,str,f64,f64,f64,f64,str,str,i64
"""ORBl6a""",4.281373,0.01,0.806,0.141976,26.4,71775.0,,16.0,0,2.430139,3550.0,4.834745,"""sua""",0.006183,"""probeA""",3775.0,0.85,0.007968,1.0,"""644866_2023-02-07_A-0""",,-0.452588,1,740589.671488,-67711.337793,9.695507,0.000079,0.007961,false,0.037873,2.713715,0.00026,18.083617,84.24,0.227792,31.0,0,122.129318,"""ORBl""",66.810535,0,0,0,true,0.193386,1,4200.0,"""<HDF5 object reference>""",0.00014,0.239281,60.0,0.179088,"""/data/dynamicrouting_datacube_…","""/units""",0
"""ORBl6a""",5.418555,0.04,0.816667,0.161293,7.4,11494.0,,1.0,1,2.78308,3550.0,7.657694,"""sua""",0.015136,"""probeA""",3775.0,0.89,0.006677,1.0,"""644866_2023-02-07_A-1""",627.548131,-0.225426,2,447447.547107,-28566.300538,9.732039,0.000217,0.010014,false,0.036428,1.482738,0.000383,2.895898,79.56,0.357914,1.0,1,104.059671,"""ORBl""",83.509683,1,1,1,true,,1,4225.0,"""<HDF5 object reference>""",0.000167,0.046852,80.0,,"""/data/dynamicrouting_datacube_…","""/units""",1
"""ORBl6a""",4.735588,0.09,0.781186,0.076461,12.0,12755.0,,204.0,2,0.0,3500.0,1.7489517,"""noise""",1.0,"""probeA""",3650.0,0.811187,0.001895,1.0,"""644866_2023-02-07_A-2""",,-0.273573,2,73743.189412,-9301.16756,0.0,0.000135,3.61881,true,0.00127,0.0,0.00056,3.213605,23.4,0.091422,445.0,1,50.983919,"""ORBl""",102.313619,13,2,13,false,,2,4200.0,"""<HDF5 object reference>""",0.00039,0.57761,160.0,,"""/data/dynamicrouting_datacube_…","""/units""",2
"""ORBl6a""",2.366996,0.235,0.349567,0.03802,2.88,3625.0,,0.0,3,,3525.0,1.4106961,"""noise""",0.0,"""probeA""",3700.0,0.864925,0.010455,1.0,"""644866_2023-02-07_A-3""",,-1.109059,1,60093.659828,-49562.068985,,0.000161,0.0,false,,,0.00068,0.913314,18.72,0.117015,0.0,5,41.338444,"""ORBl""",77.402025,9,3,9,true,0.708155,2,4200.0,"""<HDF5 object reference>""",0.00049,2.211805,160.0,0.388379,"""/data/dynamicrouting_datacube_…","""/units""",3
"""ORBl6a""",4.207927,0.095,0.551418,0.089935,5.4,11472.0,,7.0,4,2.788944,3525.0,4.443199,"""sua""",0.111805,"""probeA""",3725.0,0.74,0.005888,1.0,"""644866_2023-02-07_A-4""",1159.242801,-0.484782,1,517700.284852,-46607.973599,12.336048,0.000536,0.090475,true,0.021465,1.796561,0.000287,2.890355,60.839996,0.088802,9.0,5,91.774765,"""ORBl""",81.022349,5,4,5,true,0.198013,1,4225.0,"""<HDF5 object reference>""",0.00013,0.589177,120.0,0.089106,"""/data/dynamicrouting_datacube_…","""/units""",4
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""MOs5""",9.822377,,0.54902,0.146138,0.0,28.0,,0.0,271,,3300.0,20.605663,"""mua""",0.0,"""probeF""",2750.0,0.89,0.000534,0.181034,"""741137_2024-10-11_F-271""",686.546356,-0.425881,1,1.8013e6,-285008.501697,,,0.0,true,0.022977,,0.00048,0.004,377.90997,0.0,0.0,7,511.624283,"""MOs""",9.0031e14,1582,1625,46,false,,1,3650.0,"""<HDF5 object reference>""",0.000157,,140.0,,"""/data/dynamicrouting_datacube_…","""/units""",1625
"""MOs5""",2.368705,,0.101499,0.011016,0.8,2115.0,,0.0,272,,3200.0,3.916986,"""sua""",0.0,"""probeF""",2450.0,0.59,0.012975,1.0,"""741137_2024-10-11_F-272""",,-0.422253,1,435215.544868,-55157.82277,,0.000368,0.0,true,0.022351,,0.00044,0.302125,81.899994,0.038318,0.0,7,115.914211,"""MOs""",83.333906,1614,1626,78,true,0.272317,1,3525.0,"""<HDF5 object reference>""",0.000213,1.342152,160.0,0.116382,"""/data/dynamicrouting_datacube_…","""/units""",1626
"""MOs2/3""",1.950759,,0.104294,0.012351,0.6,895.0,,2.0,273,,3150.0,6.5293965,"""mua""",1.0,"""probeF""",2300.0,0.89,0.0082,0.939655,"""741137_2024-10-11_F-273""",462.172812,-0.460746,1,366375.417186,-73014.360392,,0.004339,17.478647,false,0.021992,,0.000637,0.12785,91.259995,0.104911,6.0,8,132.369095,"""MOs""",64.89428,1633,1627,97,false,0.304937,1,3450.0,"""<HDF5 object reference>""",0.00024,5.435848,160.0,0.195755,"""/data/dynamicrouting_datacube_…","""/units""",1627
"""MOs2/3""",2.815457,,0.127976,0.035291,0.2,344.0,,1.0,274,,3150.0,11.141338,"""mua""",1.0,"""probeF""",2300.0,0.78,0.005293,0.844828,"""741137_2024-10-11_F-274""",285.584828,-0.379229,1,662929.484267,-112392.873489,,,19.719039,true,0.024543,,0.000647,0.04914,159.12,0.014199,1.0,8,213.382172,"""MOs""",72.494576,1633,1628,97,false,,1,3450.0,"""<HDF5 object reference>""",0.000197,3.821001,140.0,,"""/data/dynamicrouting_datacube_…","""/units""",1628


**Note: by default, the `units` table does not contain array-type columns**

---
3. Data can be accessed lazily:
- `spike_times` can be loaded on a per-unit basis, across sessions

In [12]:
import polars as pl

(
    lazynwb.get_df(nwb_paths[:20], 'units', as_polars=True)
    .filter(
        pl.col('structure') == 'MOs',
        pl.col('activity_drift') < 0.2,
    )
    .sample(10) # take a random sample of n rows in df
    .pipe(lazynwb.merge_array_column, column_name='spike_times')
    .select('unit_id', 'location', 'spike_times', pl.col('spike_times').list.len().alias('n_spikes'))
)

Getting multi-NWB units table: 100%|███████████████████████████████████████████████████| 20/20 [00:02<00:00,  8.68NWB/s]


unit_id,location,spike_times,n_spikes
str,str,list[f64],u32
"""628801_2022-09-19_F-356""","""MOs2/3""","[213.783567, 589.088796, … 4812.479668]",34
"""636397_2022-09-27_F-177""","""MOs6a""","[17.876565, 17.921632, … 4816.891665]",40397
"""644866_2023-02-07_A-160""","""MOs5""","[244.613945, 244.622112, … 4127.071267]",929
"""644866_2023-02-07_A-350""","""MOs6a""","[194.984813, 230.413784, … 4139.012468]",2715
"""644866_2023-02-07_A-383""","""MOs5""","[186.240237, 188.015365, … 4145.589784]",1740
"""676909_2023-12-12_B-262""","""MOs5""","[20.364423, 20.499456, … 8296.221835]",14046
"""676909_2023-12-12_C-352""","""MOs5""","[65.940278, 70.813431, … 7967.841689]",139
"""676909_2023-12-12_C-635""","""MOs5""","[20.307347, 20.36708, … 8300.602839]",53781
"""741137_2024-10-11_F-230""","""MOs5""","[69.341044, 113.885533, … 7001.00798]",316
"""743199_2024-12-06_E-233""","""MOs5""","[20.886107, 21.189971, … 7026.011129]",39966


**Filter first (sessions, units), then fetch larger data**

---

# Other possible convenience functions / classes

In [None]:
lazynwb.get_spike_times_in_intervals(
    units_df=lazynwb.get_df(nwb_paths[:20], 'units', as_polars=True).sample(10),
    intervals={
        'baseline': (pl.col('quiescent_stop_time') - 1, pl.col('quiescent_stop_time')),
        'response': (pl.col('stim_start_time'), pl.col('stim_start_time') + 1),
    },
    intervals_df='trials',              # can be a dataframe, or the name of a dataframe in the NWBs
    apply_obs_intervals=True,           # intervals that were not recorded have NaN spike counts instead of 0
    keep_only_necessary_cols=True,     # disable to keep all unit and trial info on each row
    as_counts=True,                     # disable to keep spike times
    use_process_pool=True,
)

Getting multi-NWB units table:   0%|                                                            | 0/20 [00:00<?, ?NWB/s]

Getting multi-NWB units table: 100%|███████████████████████████████████████████████████| 20/20 [00:02<00:00,  8.32NWB/s]
Getting spike times in intervals: 100%|████████████████████████████████████████████████| 7/7 [00:00<00:00, 5232.60NWB/s]


Unnamed: 0,_table_index_units,_table_index_trials,baseline,response
0,512,257,2,0
1,512,234,1,0
2,512,374,0,0
3,512,443,0,0
4,512,190,0,0
...,...,...,...,...
5141,722,333,0,0
5142,722,405,0,0
5143,722,271,2,0
5144,722,428,0,0


In [21]:
lazynwb.LazyNWB(nwb_paths[0])

In [23]:
lazynwb.LazyNWB(nwb_paths[0]).session_start_time

datetime.datetime(2023, 2, 7, 17, 8, 15, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=57600)))