Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve html representation of datasets #1100

Open
wants to merge 22 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -14,6 +14,7 @@
- Unwrap `TermSetWrapper` within the builder to support different backends more efficiently. @mavaylon1 [#1070](https://github.com/hdmf-dev/hdmf/pull/1070)
- Added docs page that lists limitations of support for the HDMF specification language. @rly [#1069](https://github.com/hdmf-dev/hdmf/pull/1069)
- Added warning when using `add_row` or `add_column` to add a ragged array to `DynamicTable` without an index parameter. @stephprince [#1066](https://github.com/hdmf-dev/hdmf/pull/1066)
- Improve html representation of data in `Containers` @h-mayorquin [#1100](https://github.com/hdmf-dev/hdmf/pull/1100)

## HDMF 3.12.2 (February 9, 2024)

Expand Down
86 changes: 78 additions & 8 deletions src/hdmf/container.py
Expand Up @@ -711,7 +711,9 @@ def _generate_html_repr(self, fields, level=0, access_code="", is_field=False):
for index, item in enumerate(fields):
access_code += f'[{index}]'
html_repr += self._generate_field_html(index, item, level, access_code)
elif isinstance(fields, np.ndarray):
elif isinstance(fields, (np.ndarray, h5py.Dataset)):
html_repr += self._generate_array_html(fields, level)
elif hasattr(fields, "store") and hasattr(fields, "shape"): # Duck typing for zarr array
html_repr += self._generate_array_html(fields, level)
else:
pass
Expand All @@ -728,18 +730,22 @@ def _generate_field_html(self, key, value, level, access_code):
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

if hasattr(value, "generate_html_repr"):
if isinstance(value, (np.ndarray, h5py.Dataset)):
html_content = self._generate_array_html(value, level + 1)
elif hasattr(value, "store") and hasattr(value, "shape"): # Duck typing for zarr array
html_content = self._generate_array_html(value, level + 1)
elif hasattr(value, "generate_html_repr"):
html_content = value.generate_html_repr(level + 1, access_code)

elif hasattr(value, '__repr_html__'):
stephprince marked this conversation as resolved.
Show resolved Hide resolved
html_content = value.__repr_html__()

elif hasattr(value, "fields"):
elif hasattr(value, "fields"): # Note that h5py.Dataset has a fields attribute so there is an implicit order
html_content = self._generate_html_repr(value.fields, level + 1, access_code, is_field=True)
elif isinstance(value, (list, dict, np.ndarray)):
html_content = self._generate_html_repr(value, level + 1, access_code, is_field=False)
else:
html_content = f'<span class="field-key">{value}</span>'


html_repr = (
f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
f'class="container-fields field-key" title="{access_code}"><b>{key}</b></summary>'
Expand All @@ -749,10 +755,74 @@ def _generate_field_html(self, key, value, level, access_code):

return html_repr


def _generate_array_html(self, array, level):
"""Generates HTML for a NumPy array."""
str_ = str(array).replace("\n", "</br>")
return f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
"""Generates HTML for a NumPy array, h5py Dataset, or Zarr array."""

def convert_bytes_to_str(bytes_size):
suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
i = 0
while bytes_size >= 1024 and i < len(suffixes)-1:
bytes_size /= 1024.
i += 1
return f"{bytes_size:.2f} {suffixes[i]}"

# Generates an html report for the backend info, inspired on zarr info html representation
def html_table(item_dicts) -> str:
report = '<table class="data-info">'
report += "<tbody>"
for k, v in item_dicts.items():
report += (
f"<tr>"
f'<th style="text-align: left">{k}</th>'
f'<td style="text-align: left">{v}</td>'
f"</tr>"
)
report += "</tbody>"
report += "</table>"
return report

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size_in_bytes = array.nbytes
else:
array_size_in_bytes = array.size * array.dtype.itemsize
array_size_repr = convert_bytes_to_str(array_size_in_bytes)
basic_array_info_dict = {"shape": array.shape, "dtype": array.dtype, "Array size": array_size_repr}

if isinstance(array, np.ndarray):
head = "NumPy Array"
backend_info_dict = basic_array_info_dict

if isinstance(array, h5py.Dataset):
hdf5_dataset = array
chunks = hdf5_dataset.chunks
compression = hdf5_dataset.compression
compression_opts = hdf5_dataset.compression_opts
compressed_size = hdf5_dataset.id.get_storage_size()
uncompressed_size = array_size_in_bytes
compression_ratio = uncompressed_size / compressed_size if compressed_size != 0 else "undefined"

head = "HDF5 Dataset"
hdf5_info_dict = {"chunks": chunks, "compression": compression, "compression_opts": compression_opts,
"compression_ratio": compression_ratio}
backend_info_dict = {**basic_array_info_dict, **hdf5_info_dict}

if hasattr(array, "store") and hasattr(array, "shape"): # Duck typing for zarr array
head = "Zarr Array"
zarr_info_dict = {k:v for k, v in array.info_items()}
backend_info_dict = zarr_info_dict

# Add <br> tags and concatenate the components
head_html = head
backend_info_html = html_table(backend_info_dict)
repr_html = head_html + "<br>" + backend_info_html

# Display data for small datasets
array_is_small = array_size_in_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small or isinstance(array, np.ndarray):
repr_html += "<br>" + str(np.asarray(array))

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

@staticmethod
def __smart_str(v, num_indent):
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_container.py
@@ -1,6 +1,7 @@
import numpy as np
from uuid import uuid4, UUID
import os
import h5py

from hdmf.container import AbstractContainer, Container, Data, HERDManager
from hdmf.common.resources import HERD
Expand Down Expand Up @@ -423,6 +424,23 @@ def __init__(self, **kwargs):
self.data = kwargs['data']
self.str = kwargs['str']

class ContainerWithData(Container):

__fields__ = (
"data",
"str"
)

@docval(
{'name': "data", "doc": 'data', 'type': 'array_data', "default": None},
{'name': "str", "doc": 'str', 'type': str, "default": None},

)
def __init__(self, **kwargs):
super().__init__('test name')
self.data = kwargs['data']
self.str = kwargs['str']

def test_repr_html_(self):
child_obj1 = Container('test child 1')
obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello")
Expand Down Expand Up @@ -455,6 +473,39 @@ def test_repr_html_(self):
'class="field-value">hello</span></div></div>'
)

def test_repr_html_array(self):
obj = self.ContainerWithData(data=np.array([1, 2, 3, 4], dtype=np.int64), str="hello")
expected_html_table = (
'class="container-fields">NumPy Array<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">shape</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">dtype</'
'th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">Array size</th><td '
'style="text-align: left">32.00 bytes</td></tr></tbody></table><br>[1 2 3 4]</div></details><div '
'style="margin-left: 0px;" class="container-fields"><span class="field-key" title=".str">str: </'
'span><span class="field-value">hello</span></div></div>'
)
self.assertIn(expected_html_table, obj._repr_html_())

def test_repr_html_hdf5_dataset(self):
stephprince marked this conversation as resolved.
Show resolved Hide resolved

# Open an HDF5 file in write mode
with h5py.File('data.h5', 'w') as file:
dataset = file.create_dataset(name='my_dataset', data=np.array([1, 2, 3, 4], dtype=np.int64))
obj = self.ContainerWithData(data=dataset, str="hello")
expected_html_table = (
'class="container-fields">HDF5 Dataset<br><table class="data-info"><tbody><tr><th style="text-align: '
'left">shape</th><td style="text-align: left">(4,)</td></tr><tr><th style="text-align: left">dtype</'
'th><td style="text-align: left">int64</td></tr><tr><th style="text-align: left">Array size</th><td '
'style="text-align: left">32.00 bytes</td></tr><tr><th style="text-align: left">chunks</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">compression</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">compression_opts</th><td '
'style="text-align: left">None</td></tr><tr><th style="text-align: left">compression_ratio</th><td '
'style="text-align: left">1.0</td></tr></tbody></table><br>[1 2 3 4]</div></details><div '
'style="margin-left: 0px;" class="container-fields"><span class="field-key" title=".str">str: </'
'span><span class="field-value">hello</span></div></div>'
)

self.assertIn(expected_html_table, obj._repr_html_())
os.remove('data.h5')

class TestData(TestCase):

Expand Down