Skip to content

Commit

Permalink
Merge pull request #381 from data-8/hist_labels
Browse files Browse the repository at this point in the history
Hist labels
  • Loading branch information
davidwagner committed Jun 20, 2019
2 parents 029ea37 + 10458ff commit 61a301a
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 147 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ pip install datascience

This project adheres to [Semantic Versioning](http://semver.org/).

### v0.11.6
* Fixes bug where error terms show up while plotting

### v0.11.5
* Fixes bug where joining tables that have columns that are already duplicated will sometimes join incorrectly.
Expand Down
101 changes: 51 additions & 50 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2610,62 +2610,63 @@ def prepare_hist_with_group(group):
values_dict = collections.OrderedDict(values_dict)

def draw_hist(values_dict):
# This code is factored as a function for clarity only.
n = len(values_dict)
colors = [rgb_color + (self.default_alpha,) for rgb_color in
itertools.islice(itertools.cycle(self.chart_colors), n)]
hist_names = list(values_dict.keys())
values = [v[0] for v in values_dict.values()]
weights = [v[1] for v in values_dict.values() if len(v) > 1]
if n > len(weights) > 0:
raise ValueError("Weights were provided for some columns, but not "
" all, and that's not supported.")
if vargs['density']:
y_label = 'Percent per ' + (unit if unit else 'unit')
percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
else:
y_label = 'Count'

if overlay and n > 1:
# Reverse because legend prints bottom-to-top
values = values[::-1]
weights = weights[::-1]
colors = list(colors)[::-1]
if len(weights) == n:
vargs['weights'] = weights
if not side_by_side:
vargs.setdefault('histtype', 'stepfilled')
figure = plt.figure(figsize=(width, height))
plt.hist(values, color=colors, **vargs)
axis = figure.get_axes()[0]
_vertical_x(axis)
axis.set_ylabel(y_label)
with np.printoptions(legacy='1.13'):
# This code is factored as a function for clarity only.
n = len(values_dict)
colors = [rgb_color + (self.default_alpha,) for rgb_color in
itertools.islice(itertools.cycle(self.chart_colors), n)]
hist_names = list(values_dict.keys())
values = [v[0] for v in values_dict.values()]
weights = [v[1] for v in values_dict.values() if len(v) > 1]
if n > len(weights) > 0:
raise ValueError("Weights were provided for some columns, but not "
" all, and that's not supported.")
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
if unit:
axis.set_xlabel('(' + unit + ')', fontsize=16)
plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
type(self).plots.append(axis)
else:
_, axes = plt.subplots(n, 1, figsize=(width, height * n))
if 'bins' in vargs:
bins = vargs['bins']
if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
# Use stepfilled when there are too many bins
y_label = 'Percent per ' + (unit if unit else 'unit')
percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
else:
y_label = 'Count'

if overlay and n > 1:
# Reverse because legend prints bottom-to-top
values = values[::-1]
weights = weights[::-1]
colors = list(colors)[::-1]
if len(weights) == n:
vargs['weights'] = weights
if not side_by_side:
vargs.setdefault('histtype', 'stepfilled')
if n == 1:
axes = [axes]
for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
figure = plt.figure(figsize=(width, height))
plt.hist(values, color=colors, **vargs)
axis = figure.get_axes()[0]
_vertical_x(axis)
axis.set_ylabel(y_label)
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
x_unit = ' (' + unit + ')' if unit else ''
if len(weights) == n:
vargs['weights'] = weights[i]
axis.set_xlabel(hist_name + x_unit, fontsize=16)
axis.hist(values_for_hist, color=color, **vargs)
_vertical_x(axis)
if unit:
axis.set_xlabel('(' + unit + ')', fontsize=16)
plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
type(self).plots.append(axis)
else:
_, axes = plt.subplots(n, 1, figsize=(width, height * n))
if 'bins' in vargs:
bins = vargs['bins']
if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
# Use stepfilled when there are too many bins
vargs.setdefault('histtype', 'stepfilled')
if n == 1:
axes = [axes]
for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
axis.set_ylabel(y_label)
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
x_unit = ' (' + unit + ')' if unit else ''
if len(weights) == n:
vargs['weights'] = weights[i]
axis.set_xlabel(hist_name + x_unit, fontsize=16)
axis.hist(values_for_hist, color=color, **vargs)
_vertical_x(axis)
type(self).plots.append(axis)

draw_hist(values_dict)

Expand Down
3 changes: 2 additions & 1 deletion datascience/version.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
__version__ = '0.11.5'
__version__ = '0.11.6'

226 changes: 130 additions & 96 deletions tests/Charts.ipynb

Large diffs are not rendered by default.

0 comments on commit 61a301a

Please sign in to comment.