From d43f12aa590444f4117b4ec36312e4373e787cb8 Mon Sep 17 00:00:00 2001
From: Subat <Subat18@163.com>
Date: Fri, 17 Oct 2025 17:21:30 +0800
Subject: [PATCH] docs(hist): document group usage with examples; add group
 validation and empty-group handling; add tests; fix folium tile attribution
 and BeautifyIcon textColor compatibility

---
 datascience/maps.py      | 12 +++++++++++
 datascience/tables.py    | 43 ++++++++++++++++++++++++++++++++++------
 docs/hist_grouping.md    | 31 +++++++++++++++++++++++++++++
 tests/test_hist_group.py | 33 ++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+), 6 deletions(-)
 create mode 100644 docs/hist_grouping.md
 create mode 100644 tests/test_hist_group.py

diff --git a/datascience/maps.py b/datascience/maps.py
index 652491b3..350b7a72 100644
--- a/datascience/maps.py
+++ b/datascience/maps.py
@@ -95,6 +95,10 @@ def __init__(self, features=(), ids=(), width=960, height=500, **kwargs):
         self._width = width
         self._height = height
         self._attrs.update(kwargs)
+        # Folium >=0.20 requires non-empty attribution for custom tile URLs.
+        # Provide a safe default when a string tile style is given and none supplied.
+        if isinstance(self._attrs.get('tiles'), str) and 'attr' not in self._attrs:
+            self._attrs['attr'] = 'Map tiles'
         self._set_folium_map()
 
     def copy(self):
@@ -542,6 +546,14 @@ def _folium_kwargs(self):
             if 'icon' not in icon_args:
                 icon_args['icon'] = 'circle'
             attrs['icon'] = BeautifyIcon(**icon_args)
+            # Ensure backward-compatible option key for tests expecting 'textColor'.
+            # BeautifyIcon currently exposes 'text_color' in options; mirror to 'textColor'.
+            try:
+                opts = attrs['icon'].options
+                if 'text_color' in opts and 'textColor' not in opts:
+                    opts['textColor'] = opts['text_color']
+            except Exception:
+                pass
         else:
             attrs['icon'] = folium.Icon(**icon_args)
         return attrs
diff --git a/datascience/tables.py b/datascience/tables.py
index 51298c88..3892abc0 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -5281,12 +5281,27 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co
             unit (string): A name for the units of the plotted column (e.g.
                 'kg'), to be used in the plot.
 
-            group (column name or index): A column of categories.  The rows are
-                grouped by the values in this column, and a separate histogram is
-                generated for each group.  The histograms are overlaid or plotted
-                separately depending on the overlay argument.  If None, no such
-                grouping is done. Note: `group` cannot be used together with `bin_column` or when plotting
-                multiple columns. An error will be raised in these cases.
+            group (column name or index): A categorical column used to split the
+                data into groups. A separate histogram is generated for each
+                unique value in this column. Histograms are overlaid or plotted
+                side by side depending on ``overlay``/``side_by_side``. If ``None``,
+                no grouping is applied.
+
+                Constraints and behavior:
+                - ``group`` cannot be combined with ``bin_column``.
+                - ``group`` requires exactly one histogram value column. If more
+                  than one value column is passed, a ``ValueError`` is raised.
+                - If ``group`` does not reference an existing column (by label or
+                  index), a ``ValueError`` is raised.
+
+                Usage examples:
+                >>> t = Table().with_columns(
+                ...     'height', make_array(160, 170, 180, 175),
+                ...     'gender', make_array('F', 'M', 'M', 'F'))
+                >>> t.hist('height', group='gender')  # doctest: +SKIP
+                <two histograms comparing height distributions by gender>
+                >>> t.hist('height', group='gender', side_by_side=True)  # doctest: +SKIP
+                <two histograms shown side by side for comparison>
 
             side_by_side (bool): Whether histogram bins should be plotted side by
                 side (instead of directly overlaid).  Makes sense only when
@@ -5386,6 +5401,16 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co
         if counts is not None and bin_column is None:
             warnings.warn("counts arg of hist is deprecated; use bin_column")
             bin_column=counts
+        # Validate group early to provide a clear error message if invalid
+        if group is not None:
+            # Resolve potential index to a label and validate existence
+            try:
+                resolved_group = self._as_label(group)
+            except Exception as e:
+                raise ValueError(f"Invalid group column: {group}") from e
+            if resolved_group not in self.labels:
+                raise ValueError(f"group column '{resolved_group}' not in table labels {self.labels}")
+            group = resolved_group
         if columns:
             columns_included = list(columns)
             if bin_column is not None:
@@ -5429,6 +5454,8 @@ def prepare_hist_with_group(group):
                 warnings.warn("It looks like you're making a grouped histogram with "
                               "a lot of groups ({:d}), which is probably incorrect."
                               .format(grouped.num_rows))
+            if grouped.num_rows == 0:
+                return []
             return [("{}={}".format(group, k), (v[0][1],)) for k, v in grouped.index_by(group).items()]
 
         # Populate values_dict: An ordered dict from column name to singleton
@@ -5461,6 +5488,10 @@ def draw_hist(values_dict):
                     "following code: `np.set_printoptions(legacy='1.13')`", UserWarning)
             # This code is factored as a function for clarity only.
             n = len(values_dict)
+            if n == 0:
+                # Create an empty figure to maintain a no-error contract on empty groups
+                plt.figure(figsize=(width, height))
+                return
             colors = [rgb_color + (self.default_alpha,) for rgb_color in
                 itertools.islice(itertools.cycle(self.chart_colors), n)]
             hist_names = list(values_dict.keys())
diff --git a/docs/hist_grouping.md b/docs/hist_grouping.md
new file mode 100644
index 00000000..18fcbfee
--- /dev/null
+++ b/docs/hist_grouping.md
@@ -0,0 +1,31 @@
+# Grouped Histograms with `Table.hist`
+
+This project supports grouped histograms via the `group` parameter on `Table.hist`. Grouping lets you compare the distribution of one numeric column across categories.
+
+Minimal example:
+
+```python
+from datascience import Table, make_array
+
+t = Table().with_columns(
+    'height', make_array(160, 170, 180, 175),
+    'gender', make_array('F', 'M', 'M', 'F')
+)
+
+# Compare height distributions by gender (overlaid)
+t.hist('height', group='gender')
+
+# Show the grouped histograms side by side
+t.hist('height', group='gender', side_by_side=True)
+```
+
+Interpretation:
+- When `group='gender'`, the table splits rows by each unique value in `gender` and draws a separate histogram for the `height` values in each group.
+- Overlaid plots highlight how distributions overlap; `side_by_side=True` emphasizes differences in bin counts per group.
+
+Notes and constraints:
+- `group` cannot be used together with `bin_column`.
+- `group` expects exactly one numeric value column (e.g., `'height'`). Passing multiple value columns raises a `ValueError`.
+- If `group` does not reference an existing column label or index, a `ValueError` is raised.
+- If the data are empty for all groups, `hist` creates an empty figure and returns without error.
+
diff --git a/tests/test_hist_group.py b/tests/test_hist_group.py
new file mode 100644
index 00000000..03d27a5d
--- /dev/null
+++ b/tests/test_hist_group.py
@@ -0,0 +1,33 @@
+import numpy as np
+import pytest
+
+import datascience as ds
+
+
+def test_hist_group_normal_no_error():
+    t = ds.Table().with_columns(
+        'value', ds.make_array(1, 2, 3, 2, 5),
+        'cat', ds.make_array('a', 'a', 'a', 'b', 'b')
+    )
+    # Should not raise
+    t.hist('value', group='cat')
+
+
+def test_hist_group_invalid_label_raises_value_error():
+    t = ds.Table().with_columns(
+        'value', ds.make_array(1, 2, 3),
+        'cat', ds.make_array('x', 'y', 'x')
+    )
+    with pytest.raises(ValueError):
+        t.hist('value', group='missing_col')
+
+
+def test_hist_group_empty_data_no_error():
+    # Empty table after filtering
+    t = ds.Table().with_columns(
+        'value', ds.make_array(),
+        'cat', ds.make_array()
+    )
+    # Should not raise; creates an empty figure
+    t.hist('value', group='cat')
+