hdmf-dev · oruebel · Jan 3, 2023 · Dec 24, 2022 · Dec 24, 2022 · Jan 3, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,9 +6,10 @@
 * Use path relative to the current Zarr file in the definition of links and references to avoid breaking
   links when moving Zarr files @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46)
 * Fix bugs in requirements defined in setup.py @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46)
+* Update dateset used in conversion tutorial, which caused warnings  @oruebel [#56](https://github.com/hdmf-dev/hdmf-zarr/pull/56)
 
 ### Docs
-* Add tutoial illustrating how to create a new NWB file with NWBZarrIO @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46)
+* Add tutorial illustrating how to create a new NWB file with NWBZarrIO @oruebel [#46](https://github.com/hdmf-dev/hdmf-zarr/pull/46)
 * Add docs for describing the mapping of HDMF schema to Zarr storage @oruebel [#48](https://github.com/hdmf-dev/hdmf-zarr/pull/48)
 
 ## 0.1.0 

diff --git a/docs/gallery/plot_convert_nwb_hdf5.py b/docs/gallery/plot_convert_nwb_hdf5.py
@@ -2,60 +2,70 @@
 Converting NWB HDF5 files to/from Zarr
 ======================================
 
+This tutorial illustrates how to convert data between HDF5 and Zarr using
+a Neurodata Without Borders (NWB) file from the DANDI data archive as an example.
+In this tutorial we will convert our example file from HDF5 to Zarr and then
+back again to HDF5.
 """
 
 
 ###############################################################################
-# Download a small example file from DANDI
-# ----------------------------------------
+# Setup
+# -----
+#
+# We first **download a small NWB file** from the DANDI neurophysiology data archive as an example.
+# The NWB standard is defined using HDMF and uses the :py:class:`~ hdmf.backends.hdf5.h5tools.HDF5IO`
+# HDF5 backend from HDMF for storage.
 
 # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_convert_nwb.png'
 import os
 import shutil
 from dandi.dandiapi import DandiAPIClient
 
-dandiset_id = '000207'
-filepath = "sub-1/sub-1_ses-1_ecephys+image.nwb"  # 5 MB file
+dandiset_id = "000009"
+filepath = "sub-anm00239123/sub-anm00239123_ses-20170627T093549_ecephys+ogen.nwb"   # ~0.5MB file
 with DandiAPIClient() as client:
     asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(filepath)
     s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
     filename = os.path.basename(asset.path)
 asset.download(filename)
 
 ###############################################################################
-# Define output settings and clean up old files
-# ---------------------------------------------
-#
+# Next we define the names of the files to generate as part of this tutorial and clean up any
+# data from previous executions of this tutorial.
 
 zarr_filename = "test_zarr_" + filename + ".zarr"
 hdf_filename = "test_hdf5_" + filename
 
-# Delete our converted HDF5 file from previous runs of this notebook
-if os.path.exists(hdf_filename):
-    print("Removing %s" % hdf_filename)
-    os.remove(hdf_filename)
-# Delete our converted Zarr file from previous runs of this notebook
-if os.path.exists(zarr_filename):
-    print("Removing %s" % zarr_filename)
-    shutil.rmtree(zarr_filename)
+# Delete our converted HDF5 and Zarr file from previous runs of this notebook
+for fname in [zarr_filename, hdf_filename]:
+    if os.path.exists(fname):
+        print("Removing %s" % fname)
+        if os.path.isfile(fname):  # Remove a single file (here the HDF5 file)
+            os.remove(fname)
+        else:  # remove whole directory and subtree (here the Zarr file)
+            shutil.rmtree(zarr_filename)
 
 ###############################################################################
 # Convert the NWB file from HDF5 to Zarr
 # --------------------------------------
 #
+# To convert files between storage backends, we use HMDF's :hdmf-docs:`export <export.html>` functionality.
+# As this is an NWB file, we here use the :py:class:`pynwb.NWBHDF5IO` backend for reading the file from
+# from HDF5 and use the :py:class:`~hdmf_zarr.nwb.NWBZarrIO` backend to export the file to Zarr.
 
 from pynwb import NWBHDF5IO
 from hdmf_zarr.nwb import NWBZarrIO
 
-with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io:
-    with NWBZarrIO(zarr_filename, mode='w') as export_io:
-        export_io.export(src_io=read_io, write_args=dict(link_data=False))
+with NWBHDF5IO(filename, 'r', load_namespaces=False) as read_io:  # Create HDF5 IO object for read
+    with NWBZarrIO(zarr_filename, mode='w') as export_io:         # Create Zarr IO object for write
+        export_io.export(src_io=read_io, write_args=dict(link_data=False))   # Export from HDF5 to Zarr
 
 ###############################################################################
 # .. note::
 #
-#     When converting between backends we need to set ``link_data=False`` as linking from Zarr
-#     to HDF5 and vice-versa is not supported.
+#     When converting between backends we need to set ``link_data=False`` as linking
+#     from Zarr to HDF5 (and vice-versa) is not supported.
 #
 # Read the Zarr file back in
 # --------------------------
@@ -72,25 +82,32 @@
 
 ###############################################################################
 # The main difference is that datasets are now represented by Zarr arrays compared
-# to h5py Datasets when reading from HDF5
+# to h5py Datasets when reading from HDF5.
+
+print(type(zf.trials['start_time'].data))
+
+###############################################################################
+# For illustration purposes, we here show a few columns of the
+# :pynwb-docs:`Trials <tutorials/general/plot_timeintervals.html>` table.
 
-print(type(zf.get_acquisition(name='events').data))
+zf.trials.to_dataframe()[['start_time', 'stop_time', 'type', 'photo_stim_type']]
 
 ###############################################################################
 # Convert the Zarr file back to HDF5
 # ----------------------------------
 #
+# Using the same approach as above, we can now convert our Zarr file back to HDF5.
 
-with NWBZarrIO(zarr_filename, mode='r') as read_io:
-    with NWBHDF5IO(hdf_filename, 'w') as export_io:
-        export_io.export(src_io=read_io, write_args=dict(link_data=False))
+with NWBZarrIO(zarr_filename, mode='r') as read_io:  # Create Zarr IO object for read
+    with NWBHDF5IO(hdf_filename, 'w') as export_io:  # Create HDF5 IO object for write
+        export_io.export(src_io=read_io, write_args=dict(link_data=False))  # Export from Zarr to HDF5
 
 ###############################################################################
 # Read the new HDF5 file back
 # ---------------------------
 #
 # Now our file has been converted from HDF5 to Zarr and back again to HDF5.
-# Here we check that we can still read that file
+# Here we check that we can still read that file.
 
 with NWBHDF5IO(hdf_filename, 'r') as hr:
     hf = hr.read()
diff --git a/docs/gallery/plot_nwb_zarrio.py b/docs/gallery/plot_nwb_zarrio.py
@@ -84,7 +84,7 @@
 )
 
 # Add a mock electrical recording acquisition to the NWBFile
-raw_data = np.random.randn(50, 4)
+raw_data = np.random.randn(50, len(all_table_region))
 raw_electrical_series = ElectricalSeries(
     name="ElectricalSeries",
     data=raw_data,
@@ -95,7 +95,7 @@
 nwbfile.add_acquisition(raw_electrical_series)
 
 # Add a mock LFP processing result to the NWBFile
-lfp_data = np.random.randn(50, 4)
+lfp_data = np.random.randn(50, len(all_table_region))
 lfp_electrical_series = ElectricalSeries(
     name="ElectricalSeries",
     data=lfp_data,

diff --git a/docs/source/storage.rst b/docs/source/storage.rst
@@ -238,8 +238,7 @@ Object references are stored in a attributes as dicts with the following keys:
   ``zarr_dtype`` is set to ``"object"`` (or ``"region"`` for :ref:`sec-zarr-storage-references-region`)
 * ``value``: The value of the object references, i.e., here the py:class:`~hdmf_zarr.utils.ZarrReference`
   dictionary with the ``source`` and ``path`` keys defining the object reference (again, ``source`` is
-  here the relative path to the target Zarr file, and ``path`` identifys the object within the source
-   Zarr file).
+  here the relative path to the target Zarr file, and ``path`` identifys the object within the source Zarr file).
 
 For example in NWB, the attribute ``ElectricalSeries.electrodes.table`` would be defined as follows: