From 2c3e541cb6414476cb33a5505c7c51af6ff0e546 Mon Sep 17 00:00:00 2001
From: Adnan Hemani <adnan.h@berkeley.edu>
Date: Sun, 5 Jun 2022 03:15:41 -0500
Subject: [PATCH 1/2] explicitly render historgram to prevent rounding errors

---
 datascience/tables.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datascience/tables.py b/datascience/tables.py
index 6cfe1579..46d4622c 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -5439,6 +5439,7 @@ def draw_hist(values_dict):
                         axis.set_xlabel(x_unit, fontsize=16)
                     plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
                     type(self).plots.append(axis)
+                    plt.show()
                 else:
                     _, axes = plt.subplots(n, 1, figsize=(width, height * n))
                     if 'bins' in vargs:
@@ -5466,6 +5467,7 @@ def draw_hist(values_dict):
                             axis.scatter(values_for_hist, np.zeros_like(values_for_hist), marker="|",
                                          color="black", s=100, zorder=10)
                         type(self).plots.append(axis)
+                    plt.show()
 
         draw_hist(values_dict)
 

From 6e2990e41c8a4b82360d240c5deb0869d3df1dad Mon Sep 17 00:00:00 2001
From: Adnan Hemani <adnan.h@berkeley.edu>
Date: Mon, 6 Jun 2022 02:13:12 -0500
Subject: [PATCH 2/2] new version of code

---
 datascience/tables.py | 140 ++++++++++++++++++++++--------------------
 1 file changed, 73 insertions(+), 67 deletions(-)

diff --git a/datascience/tables.py b/datascience/tables.py
index 46d4622c..a97db7b1 100644
--- a/datascience/tables.py
+++ b/datascience/tables.py
@@ -32,6 +32,10 @@
 
 _INTERACTIVE_PLOTS = False
 
+# Set numpy printoptions to legacy to get around error terms, as described in
+# https://github.com/data-8/datascience/issues/491
+np.set_printoptions(legacy='1.13')
+
 class Table(collections.abc.MutableMapping):
     """A sequence of string-labeled columns."""
     plots = collections.deque(maxlen=10)
@@ -5393,81 +5397,83 @@ def prepare_hist_with_group(group):
                     right_end = max([max(self.column(k)) for k in self.labels if np.issubdtype(self.column(k).dtype, np.number)])
 
         def draw_hist(values_dict):
-            with np.printoptions(legacy='1.13'):
-                # This code is factored as a function for clarity only.
-                n = len(values_dict)
-                colors = [rgb_color + (self.default_alpha,) for rgb_color in
-                    itertools.islice(itertools.cycle(self.chart_colors), n)]
-                hist_names = list(values_dict.keys())
-                values = [v[0] for v in values_dict.values()]
-                weights = [v[1] for v in values_dict.values() if len(v) > 1]
-                if n > len(weights) > 0:
-                    raise ValueError("Weights were provided for some columns, but not "
-                                     " all, and that's not supported.")
-                if rug and overlay and n > 1:
-                    warnings.warn("Cannot plot overlaid rug plots; rug=True ignored", UserWarning)
+            # Check if np.printoptions is set to legacy. Throw UserWarning if not
+            if np.get_printoptions()['legacy'] != '1.13':
+                warnings.warn("We've detected you're not using the '1.13' legacy setting for `np.printoptions`. "
+                    "This may cause excessive error terms in your plots. We recommend solving this by running the "
+                    "following code: `np.set_printoptions(legacy='1.13')`", UserWarning)
+            # This code is factored as a function for clarity only.
+            n = len(values_dict)
+            colors = [rgb_color + (self.default_alpha,) for rgb_color in
+                itertools.islice(itertools.cycle(self.chart_colors), n)]
+            hist_names = list(values_dict.keys())
+            values = [v[0] for v in values_dict.values()]
+            weights = [v[1] for v in values_dict.values() if len(v) > 1]
+            if n > len(weights) > 0:
+                raise ValueError("Weights were provided for some columns, but not "
+                                 " all, and that's not supported.")
+            if rug and overlay and n > 1:
+                warnings.warn("Cannot plot overlaid rug plots; rug=True ignored", UserWarning)
+            if vargs['density']:
+                y_label = 'Percent per ' + (unit if unit else 'unit')
+                percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
+            else:
+                y_label = 'Count'
+
+            if overlay and n > 1:
+                # Reverse because legend prints bottom-to-top
+                values = values[::-1]
+                weights = weights[::-1]
+                colors = list(colors)[::-1]
+                if len(weights) == n:
+                    vargs['weights'] = weights
+                if not side_by_side:
+                    vargs.setdefault('histtype', 'stepfilled')
+                figure = plt.figure(figsize=(width, height))
+                plt.hist(values, color=colors, **vargs)
+                # if rug:
+                #     plt.scatter(values, np.zeros_like(values), marker="|", color=colors)
+                axis = figure.get_axes()[0]
+                _vertical_x(axis)
+                axis.set_ylabel(y_label)
                 if vargs['density']:
-                    y_label = 'Percent per ' + (unit if unit else 'unit')
-                    percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
+                    axis.yaxis.set_major_formatter(percentage)
+                x_unit = ' (' + unit + ')' if unit else ''
+                if group is not None and len(self.labels) == 2:
+                    #There's a grouping in place but we're only plotting one column's values
+                    label_not_grouped = [l for l in self.labels if l != group][0]
+                    axis.set_xlabel(label_not_grouped + x_unit, fontsize=16)
                 else:
-                    y_label = 'Count'
-
-                if overlay and n > 1:
-                    # Reverse because legend prints bottom-to-top
-                    values = values[::-1]
-                    weights = weights[::-1]
-                    colors = list(colors)[::-1]
-                    if len(weights) == n:
-                        vargs['weights'] = weights
-                    if not side_by_side:
+                    axis.set_xlabel(x_unit, fontsize=16)
+                plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
+                type(self).plots.append(axis)
+            else:
+                _, axes = plt.subplots(n, 1, figsize=(width, height * n))
+                if 'bins' in vargs:
+                    bins = vargs['bins']
+                    if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
+                        # Use stepfilled when there are too many bins
                         vargs.setdefault('histtype', 'stepfilled')
-                    figure = plt.figure(figsize=(width, height))
-                    plt.hist(values, color=colors, **vargs)
-                    # if rug:
-                    #     plt.scatter(values, np.zeros_like(values), marker="|", color=colors)
-                    axis = figure.get_axes()[0]
-                    _vertical_x(axis)
+                if n == 1:
+                    axes = [axes]
+                for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
                     axis.set_ylabel(y_label)
                     if vargs['density']:
                         axis.yaxis.set_major_formatter(percentage)
                     x_unit = ' (' + unit + ')' if unit else ''
-                    if group is not None and len(self.labels) == 2:
-                        #There's a grouping in place but we're only plotting one column's values
-                        label_not_grouped = [l for l in self.labels if l != group][0]
-                        axis.set_xlabel(label_not_grouped + x_unit, fontsize=16)
-                    else:
-                        axis.set_xlabel(x_unit, fontsize=16)
-                    plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
+                    if len(weights) == n:
+                        vargs['weights'] = weights[i]
+                    axis.set_xlabel(hist_name + x_unit, fontsize=16)
+                    heights, bins, patches = axis.hist(values_for_hist, color=color, **vargs)
+                    if left_end is not None and right_end is not None:
+                        x_shade, height_shade, width_shade = _compute_shading(heights, bins.copy(), left_end, right_end)
+                        axis.bar(x_shade, height_shade, width=width_shade,
+                                 color=self.chart_colors[1], align="edge")
+                    _vertical_x(axis)
+                    if rug:
+                        axis.scatter(values_for_hist, np.zeros_like(values_for_hist), marker="|",
+                                     color="black", s=100, zorder=10)
                     type(self).plots.append(axis)
-                    plt.show()
-                else:
-                    _, axes = plt.subplots(n, 1, figsize=(width, height * n))
-                    if 'bins' in vargs:
-                        bins = vargs['bins']
-                        if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
-                            # Use stepfilled when there are too many bins
-                            vargs.setdefault('histtype', 'stepfilled')
-                    if n == 1:
-                        axes = [axes]
-                    for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
-                        axis.set_ylabel(y_label)
-                        if vargs['density']:
-                            axis.yaxis.set_major_formatter(percentage)
-                        x_unit = ' (' + unit + ')' if unit else ''
-                        if len(weights) == n:
-                            vargs['weights'] = weights[i]
-                        axis.set_xlabel(hist_name + x_unit, fontsize=16)
-                        heights, bins, patches = axis.hist(values_for_hist, color=color, **vargs)
-                        if left_end is not None and right_end is not None:
-                            x_shade, height_shade, width_shade = _compute_shading(heights, bins.copy(), left_end, right_end)
-                            axis.bar(x_shade, height_shade, width=width_shade,
-                                     color=self.chart_colors[1], align="edge")
-                        _vertical_x(axis)
-                        if rug:
-                            axis.scatter(values_for_hist, np.zeros_like(values_for_hist), marker="|",
-                                         color="black", s=100, zorder=10)
-                        type(self).plots.append(axis)
-                    plt.show()
 
         draw_hist(values_dict)