diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f4d883c..06066042 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,10 @@ * Use path relative to the current Zarr file in the definition of links and references to avoid breaking links when moving Zarr files @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46) * Fix bugs in requirements defined in setup.py @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46) +* Update dateset used in conversion tutorial, which caused warnings @oruebel [#56](https://github.com/hdmf-dev/hdmf-zarr/pull/56) ### Docs -* Add tutoial illustrating how to create a new NWB file with NWBZarrIO @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46) +* Add tutorial illustrating how to create a new NWB file with NWBZarrIO @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46) * Add docs for describing the mapping of HDMF schema to Zarr storage @oruebel [#48](https://github.com/hdmf-dev/hdmf-zarr/pull/48) ## 0.1.0 diff --git a/docs/gallery/plot_convert_nwb_hdf5.py b/docs/gallery/plot_convert_nwb_hdf5.py index 88780b84..4bfbc9bd 100644 --- a/docs/gallery/plot_convert_nwb_hdf5.py +++ b/docs/gallery/plot_convert_nwb_hdf5.py @@ -2,20 +2,28 @@ Converting NWB HDF5 files to/from Zarr ====================================== +This tutorial illustrates how to convert data between HDF5 and Zarr using +a Neurodata Without Borders (NWB) file from the DANDI data archive as an example. +In this tutorial we will convert our example file from HDF5 to Zarr and then +back again to HDF5. """ ############################################################################### -# Download a small example file from DANDI -# ---------------------------------------- +# Setup +# ----- +# +# We first **download a small NWB file** from the DANDI neurophysiology data archive as an example. +# The NWB standard is defined using HDMF and uses the :py:class:`~ hdmf.backends.hdf5.h5tools.HDF5IO` +# HDF5 backend from HDMF for storage. # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_convert_nwb.png' import os import shutil from dandi.dandiapi import DandiAPIClient -dandiset_id = '000207' -filepath = "sub-1/sub-1_ses-1_ecephys+image.nwb" # 5 MB file +dandiset_id = "000009" +filepath = "sub-anm00239123/sub-anm00239123_ses-20170627T093549_ecephys+ogen.nwb" # ~0.5MB file with DandiAPIClient() as client: asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(filepath) s3_path = asset.get_content_url(follow_redirects=1, strip_query=True) @@ -23,39 +31,41 @@ asset.download(filename) ############################################################################### -# Define output settings and clean up old files -# --------------------------------------------- -# +# Next we define the names of the files to generate as part of this tutorial and clean up any +# data from previous executions of this tutorial. zarr_filename = "test_zarr_" + filename + ".zarr" hdf_filename = "test_hdf5_" + filename -# Delete our converted HDF5 file from previous runs of this notebook -if os.path.exists(hdf_filename): - print("Removing %s" % hdf_filename) - os.remove(hdf_filename) -# Delete our converted Zarr file from previous runs of this notebook -if os.path.exists(zarr_filename): - print("Removing %s" % zarr_filename) - shutil.rmtree(zarr_filename) +# Delete our converted HDF5 and Zarr file from previous runs of this notebook +for fname in [zarr_filename, hdf_filename]: + if os.path.exists(fname): + print("Removing %s" % fname) + if os.path.isfile(fname): # Remove a single file (here the HDF5 file) + os.remove(fname) + else: # remove whole directory and subtree (here the Zarr file) + shutil.rmtree(zarr_filename) ############################################################################### # Convert the NWB file from HDF5 to Zarr # -------------------------------------- # +# To convert files between storage backends, we use HMDF's :hdmf-docs:`export ` functionality. +# As this is an NWB file, we here use the :py:class:`pynwb.NWBHDF5IO` backend for reading the file from +# from HDF5 and use the :py:class:`~hdmf_zarr.nwb.NWBZarrIO` backend to export the file to Zarr. from pynwb import NWBHDF5IO from hdmf_zarr.nwb import NWBZarrIO -with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io: - with NWBZarrIO(zarr_filename, mode='w') as export_io: - export_io.export(src_io=read_io, write_args=dict(link_data=False)) +with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io: # Create HDF5 IO object for read + with NWBZarrIO(zarr_filename, mode='w') as export_io: # Create Zarr IO object for write + export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from HDF5 to Zarr ############################################################################### # .. note:: # -# When converting between backends we need to set ``link_data=False`` as linking from Zarr -# to HDF5 and vice-versa is not supported. +# When converting between backends we need to set ``link_data=False`` as linking +# from Zarr to HDF5 (and vice-versa) is not supported. # # Read the Zarr file back in # -------------------------- @@ -72,25 +82,32 @@ ############################################################################### # The main difference is that datasets are now represented by Zarr arrays compared -# to h5py Datasets when reading from HDF5 +# to h5py Datasets when reading from HDF5. + +print(type(zf.trials['start_time'].data)) + +############################################################################### +# For illustration purposes, we here show a few columns of the +# :pynwb-docs:`Trials ` table. -print(type(zf.get_acquisition(name='events').data)) +zf.trials.to_dataframe()[['start_time', 'stop_time', 'type', 'photo_stim_type']] ############################################################################### # Convert the Zarr file back to HDF5 # ---------------------------------- # +# Using the same approach as above, we can now convert our Zarr file back to HDF5. -with NWBZarrIO(zarr_filename, mode='r') as read_io: - with NWBHDF5IO(hdf_filename, 'w') as export_io: - export_io.export(src_io=read_io, write_args=dict(link_data=False)) +with NWBZarrIO(zarr_filename, mode='r') as read_io: # Create Zarr IO object for read + with NWBHDF5IO(hdf_filename, 'w') as export_io: # Create HDF5 IO object for write + export_io.export(src_io=read_io, write_args=dict(link_data=False)) # Export from Zarr to HDF5 ############################################################################### # Read the new HDF5 file back # --------------------------- # # Now our file has been converted from HDF5 to Zarr and back again to HDF5. -# Here we check that we can still read that file +# Here we check that we can still read that file. with NWBHDF5IO(hdf_filename, 'r') as hr: hf = hr.read() diff --git a/docs/gallery/plot_nwb_zarrio.py b/docs/gallery/plot_nwb_zarrio.py index 97cddcf7..e86a00a7 100644 --- a/docs/gallery/plot_nwb_zarrio.py +++ b/docs/gallery/plot_nwb_zarrio.py @@ -84,7 +84,7 @@ ) # Add a mock electrical recording acquisition to the NWBFile -raw_data = np.random.randn(50, 4) +raw_data = np.random.randn(50, len(all_table_region)) raw_electrical_series = ElectricalSeries( name="ElectricalSeries", data=raw_data, @@ -95,7 +95,7 @@ nwbfile.add_acquisition(raw_electrical_series) # Add a mock LFP processing result to the NWBFile -lfp_data = np.random.randn(50, 4) +lfp_data = np.random.randn(50, len(all_table_region)) lfp_electrical_series = ElectricalSeries( name="ElectricalSeries", data=lfp_data, diff --git a/docs/source/storage.rst b/docs/source/storage.rst index f27e9380..a336c6de 100644 --- a/docs/source/storage.rst +++ b/docs/source/storage.rst @@ -238,8 +238,7 @@ Object references are stored in a attributes as dicts with the following keys: ``zarr_dtype`` is set to ``"object"`` (or ``"region"`` for :ref:`sec-zarr-storage-references-region`) * ``value``: The value of the object references, i.e., here the py:class:`~hdmf_zarr.utils.ZarrReference` dictionary with the ``source`` and ``path`` keys defining the object reference (again, ``source`` is - here the relative path to the target Zarr file, and ``path`` identifys the object within the source - Zarr file). + here the relative path to the target Zarr file, and ``path`` identifys the object within the source Zarr file). For example in NWB, the attribute ``ElectricalSeries.electrodes.table`` would be defined as follows: