In [35]:
%load_ext autoreload
%autoreload 2
import hvneuro
import os

import numcodecs
import h5py
import numpy as np
import shutil
import zarr
import dask.array as da
import xarray as xr
import fsspec
import warnings
import kerchunk
import kerchunk.hdf
import json
import ujson

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
files_to_download = {
    "http://api.brain-map.org/api/v2/well_known_file_download/1026124481": "probe_810755797_lfp.nwb",
    # "http://api.brain-map.org/api/v2/well_known_file_download/1026124479": "probe_810755799_lfp.nwb",
    # "http://api.brain-map.org/api/v2/well_known_file_download/1026124471": "probe_810755801_lfp.nwb",
    # "http://api.brain-map.org/api/v2/well_known_file_download/1026124473": "probe_810755803_lfp.nwb",
    # "http://api.brain-map.org/api/v2/well_known_file_download/1026124475": "probe_810755805_lfp.nwb",
    # "http://api.brain-map.org/api/v2/well_known_file_download/1026124477": "probe_810755807_lfp.nwb",
}

In [24]:
data_dir='~/data/allen/'
data_dir = os.path.expanduser(data_dir)
hvneuro.download_files(files_to_download, data_dir=data_dir)

/Users/droumis/data/allen/probe_810755797_lfp.nwb already exists. Skipping download.


In [39]:
probe_id = "810755797"
data_dir='~/data/allen/'
data_dir = os.path.expanduser(data_dir)
nwb_filepath = os.path.join(data_dir, f"probe_{probe_id}_lfp.nwb")
nwb_ref_filepath = os.path.join(data_dir, "lfp_one_probe_ref.json")
lfp_group_path = f"acquisition/probe_{probe_id}_lfp/probe_{probe_id}_lfp_data"
lfp_data_path = lfp_group_path + "/data"
electrodes_data_path = lfp_group_path + "/electrodes"
time_data_path = lfp_group_path + "/timestamps"

def get_compressor(compression_type, compression_level):
    if compression_type == "gzip":
        return numcodecs.Zlib(compression_level)
    else:
        return None
    
## Create a reference file to the NWB file 
def set_metadata(f, refs, data_path, dimensions):
    ref_path = data_path + "/.zarray"
    if ref_path in refs['refs']:
        ref_str = refs['refs'][ref_path]
        ref_dict = json.loads(ref_str)  # Deserialize the JSON string
        print(ref_path)
        ref_dict['_ARRAY_DIMENSIONS'] = dimensions
        compression_type = f[data_path].compression
        compression_level = f[data_path].compression_opts
        # compressor_info = get_compressor(compression_type, compression_level)
        # Convert compressor_info to a dictionary if it's not None
        if compression_type == "gzip": 
            compressor_dict = {
                'id': 'zlib',
                'level': compression_level,
            }
        else:
            compressor_dict = None


        ref_dict['dtype'] = str(f[data_path].dtype)
        ref_dict['compressor'] = compressor_dict
        ref_str = json.dumps(ref_dict)  # Serialize back to a JSON string
        refs['refs'][ref_path] = ref_str  # Replace the reference in refs
        print(ref_dict)  # Original reference

with fsspec.open(nwb_filepath) as f:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        h5chunks = kerchunk.hdf.SingleHdf5ToZarr(f, nwb_filepath)
        refs = h5chunks.translate()

# Open the file with h5py to fetch attributes
with h5py.File(nwb_filepath, 'r') as f:
    # Adding _ARRAY_DIMENSIONS, compression, dtype
    set_metadata(f, refs, lfp_data_path, ["time", "channel"])
    set_metadata(f, refs, electrodes_data_path, ["channel"])
    set_metadata(f, refs, time_data_path, ["time"])

with open(nwb_ref_filepath, "wb") as f:
    f.write(ujson.dumps(refs).encode())

fs = fsspec.filesystem("reference", fo=nwb_ref_filepath)
ds = xr.open_dataset(
    fs.get_mapper(),
    engine="zarr",
    group=lfp_group_path, # i think setting the group is one aspect that Ian's approach was missing.. 
    backend_kwargs={"consolidated": False, "mask_and_scale": False}
)

data = ds.lfp[:100, :4]
print("==> data", data.compute())
mean = data.mean().compute()
print("==> mean", mean)

acquisition/probe_810755797_lfp/probe_810755797_lfp_data/data/.zarray
{'chunks': [41859, 1], 'compressor': {'id': 'zlib', 'level': 9}, 'dtype': 'float32', 'fill_value': 0.0, 'filters': None, 'order': 'C', 'shape': [10715666, 93], 'zarr_format': 2, '_ARRAY_DIMENSIONS': ['time', 'channel']}
acquisition/probe_810755797_lfp/probe_810755797_lfp_data/electrodes/.zarray
{'chunks': [93], 'compressor': None, 'dtype': 'int64', 'fill_value': 0, 'filters': None, 'order': 'C', 'shape': [93], 'zarr_format': 2, '_ARRAY_DIMENSIONS': ['channel']}
acquisition/probe_810755797_lfp/probe_810755797_lfp_data/timestamps/.zarray
{'chunks': [10465], 'compressor': {'id': 'zlib', 'level': 9}, 'dtype': 'float64', 'fill_value': 0.0, 'filters': None, 'order': 'C', 'shape': [10715666], 'zarr_format': 2, '_ARRAY_DIMENSIONS': ['time']}


ValueError: conflicting sizes for dimension 'phony_dim_0': length 93 on 'electrodes' and length 10715666 on {'phony_dim_0': 'data', 'phony_dim_1': 'data'}