Skip to content

Commit

Permalink
merged with master
Browse files Browse the repository at this point in the history
  • Loading branch information
adnanhemani committed Jun 20, 2019
2 parents f1cd7a0 + b65bafa commit dfe6461
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 60 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ This project adheres to [Semantic Versioning](http://semver.org/).
### v0.11.8
* Fixes bug where x-label doesn't show up for grouped histogram in certain conditions.

### v0.11.7
* Fixed bug where Table#hist was sometimes truncating the x-axis label.

### v0.11.6
* Fixes bug where error terms show up while plotting

### v0.11.5
* Fixes bug where joining tables that have columns that are already duplicated will sometimes join incorrectly.

Expand Down
112 changes: 57 additions & 55 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2586,7 +2586,8 @@ def prepare_hist_with_bin_column(bin_column):
# This code is factored as a function for clarity only.
weight_columns = [c for c in self.labels if c != bin_column]
bin_values = self.column(bin_column)
values_dict = [(w.rstrip(' count'), (bin_values, self.column(w))) for w in weight_columns]
values_dict = [(w[:-6] if w.endswith(' count') else w, (bin_values, self.column(w))) \
for w in weight_columns]
return values_dict

def prepare_hist_with_group(group):
Expand All @@ -2610,67 +2611,68 @@ def prepare_hist_with_group(group):
values_dict = collections.OrderedDict(values_dict)

def draw_hist(values_dict):
# This code is factored as a function for clarity only.
n = len(values_dict)
colors = [rgb_color + (self.default_alpha,) for rgb_color in
itertools.islice(itertools.cycle(self.chart_colors), n)]
hist_names = list(values_dict.keys())
values = [v[0] for v in values_dict.values()]
weights = [v[1] for v in values_dict.values() if len(v) > 1]
if n > len(weights) > 0:
raise ValueError("Weights were provided for some columns, but not "
" all, and that's not supported.")
if vargs['density']:
y_label = 'Percent per ' + (unit if unit else 'unit')
percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
else:
y_label = 'Count'

if overlay and n > 1:
# Reverse because legend prints bottom-to-top
values = values[::-1]
weights = weights[::-1]
colors = list(colors)[::-1]
if len(weights) == n:
vargs['weights'] = weights
if not side_by_side:
vargs.setdefault('histtype', 'stepfilled')
figure = plt.figure(figsize=(width, height))
plt.hist(values, color=colors, **vargs)
axis = figure.get_axes()[0]
_vertical_x(axis)
axis.set_ylabel(y_label)
with np.printoptions(legacy='1.13'):
# This code is factored as a function for clarity only.
n = len(values_dict)
colors = [rgb_color + (self.default_alpha,) for rgb_color in
itertools.islice(itertools.cycle(self.chart_colors), n)]
hist_names = list(values_dict.keys())
values = [v[0] for v in values_dict.values()]
weights = [v[1] for v in values_dict.values() if len(v) > 1]
if n > len(weights) > 0:
raise ValueError("Weights were provided for some columns, but not "
" all, and that's not supported.")
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
x_unit = ' (' + unit + ')' if unit else ''
if group is not None and len(self.labels) == 2:
#There's a grouping in place but we're only plotting one column's values
label_not_grouped = [l for l in self.labels if l != group][0]
axis.set_xlabel(label_not_grouped + x_unit, fontsize=16)
else:
axis.set_xlabel(x_unit, fontsize=16)
plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
type(self).plots.append(axis)
else:
_, axes = plt.subplots(n, 1, figsize=(width, height * n))
if 'bins' in vargs:
bins = vargs['bins']
if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
# Use stepfilled when there are too many bins
y_label = 'Percent per ' + (unit if unit else 'unit')
percentage = plt.FuncFormatter(lambda x, _: "{:g}".format(100*x))
else:
y_label = 'Count'

if overlay and n > 1:
# Reverse because legend prints bottom-to-top
values = values[::-1]
weights = weights[::-1]
colors = list(colors)[::-1]
if len(weights) == n:
vargs['weights'] = weights
if not side_by_side:
vargs.setdefault('histtype', 'stepfilled')
if n == 1:
axes = [axes]
for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
figure = plt.figure(figsize=(width, height))
plt.hist(values, color=colors, **vargs)
axis = figure.get_axes()[0]
_vertical_x(axis)
axis.set_ylabel(y_label)
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
x_unit = ' (' + unit + ')' if unit else ''
if len(weights) == n:
vargs['weights'] = weights[i]
axis.set_xlabel(hist_name + x_unit, fontsize=16)
axis.hist(values_for_hist, color=color, **vargs)
_vertical_x(axis)
if group is not None and len(self.labels) == 2:
#There's a grouping in place but we're only plotting one column's values
label_not_grouped = [l for l in self.labels if l != group][0]
axis.set_xlabel(label_not_grouped + x_unit, fontsize=16)
else:
axis.set_xlabel(x_unit, fontsize=16)
plt.legend(hist_names, loc=2, bbox_to_anchor=(1.05, 1))
type(self).plots.append(axis)
else:
_, axes = plt.subplots(n, 1, figsize=(width, height * n))
if 'bins' in vargs:
bins = vargs['bins']
if isinstance(bins, numbers.Integral) and bins > 76 or hasattr(bins, '__len__') and len(bins) > 76:
# Use stepfilled when there are too many bins
vargs.setdefault('histtype', 'stepfilled')
if n == 1:
axes = [axes]
for i, (axis, hist_name, values_for_hist, color) in enumerate(zip(axes, hist_names, values, colors)):
axis.set_ylabel(y_label)
if vargs['density']:
axis.yaxis.set_major_formatter(percentage)
x_unit = ' (' + unit + ')' if unit else ''
if len(weights) == n:
vargs['weights'] = weights[i]
axis.set_xlabel(hist_name + x_unit, fontsize=16)
axis.hist(values_for_hist, color=color, **vargs)
_vertical_x(axis)
type(self).plots.append(axis)

draw_hist(values_dict)

Expand Down

0 comments on commit dfe6461

Please sign in to comment.