Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Display both absolute frequencies and percentages #89

Merged
merged 12 commits into from Mar 17, 2020
3 changes: 2 additions & 1 deletion CHANGELOG.rst
@@ -1,6 +1,7 @@
In development
--------------

- Added option to display both the absolute frequency and the percentage of
the total for each intersection and category. (:issue:`89`)
- Improved efficiency where there are many categories, but valid combinations
are sparse, if `sort_by='degree'`. (:issue:`82`)
- Permit truthy (not necessarily bool) values in index. (:issue:`74`)
Expand Down
4 changes: 4 additions & 0 deletions examples/plot_generated.py
Expand Up @@ -23,3 +23,7 @@
plot(example, show_counts='%d')
plt.suptitle('With counts shown')
plt.show()

plot(example, show_counts='%d', show_percentages=True)
plt.suptitle('With counts and % shown')
plt.show()
4 changes: 4 additions & 0 deletions examples/plot_vertical.py
Expand Up @@ -17,3 +17,7 @@
plot(example, orientation='vertical', show_counts='%d')
plt.suptitle('A vertical plot with counts shown')
plt.show()

plot(example, orientation='vertical', show_counts='%d', show_percentages=True)
plt.suptitle('With counts and percentages shown')
plt.show()
2 changes: 1 addition & 1 deletion upsetplot/__init__.py
@@ -1,4 +1,4 @@
__version__ = '0.4-dev'
__version__ = '0.4.dev1'

import os

Expand Down
52 changes: 43 additions & 9 deletions upsetplot/plotting.py
Expand Up @@ -272,6 +272,10 @@ class UpSet:
Whether to label the intersection size bars with the cardinality
of the intersection. When a string, this formats the number.
For example, '%d' is equivalent to True.
show_percentages : bool, default=False
Whether to label the intersection size bars with the percentage
of the intersection relative to the total dataset.
This may be applied with or without show_counts.
sort_sets_by
.. deprecated: 0.3
Replaced by sort_categories_by, this parameter will be removed in
Expand All @@ -285,7 +289,8 @@ def __init__(self, data, orientation='horizontal', sort_by='degree',
facecolor='black',
with_lines=True, element_size=32,
intersection_plot_elements=6, totals_plot_elements=2,
show_counts='', sort_sets_by='deprecated'):
show_counts='', show_percentages=False,
sort_sets_by='deprecated'):

self._horizontal = orientation == 'horizontal'
self._reorient = _identity if self._horizontal else _transpose
Expand All @@ -299,6 +304,7 @@ def __init__(self, data, orientation='horizontal', sort_by='degree',
if not intersection_plot_elements:
self._subset_plots.pop()
self._show_counts = show_counts
self._show_percentages = show_percentages

if sort_sets_by != 'deprecated':
sort_categories_by = sort_sets_by
Expand Down Expand Up @@ -429,7 +435,6 @@ def make_grid(self, fig=None):
self._totals_plot_elements),
hspace=1)
if self._horizontal:
print(n_cats, n_inters, self._totals_plot_elements)
out = {'matrix': gridspec[-n_cats:, -n_inters:],
'shading': gridspec[-n_cats:, :],
'totals': gridspec[-n_cats:, :self._totals_plot_elements],
Expand All @@ -446,8 +451,8 @@ def make_grid(self, fig=None):
cumsizes = np.cumsum(sizes)
for start, stop, plot in zip(np.hstack([[0], cumsizes]), cumsizes,
self._subset_plots):
out[plot['id']] = gridspec[-n_inters:,
start + n_cats:stop + n_cats]
out[plot['id']] = \
gridspec[-n_inters:, start + n_cats:stop + n_cats]
return out

def plot_matrix(self, ax):
Expand Down Expand Up @@ -505,31 +510,60 @@ def plot_intersections(self, ax):
ax.set_ylabel('Intersection size')

def _label_sizes(self, ax, rects, where):
if not self._show_counts:
if not self._show_counts and not self._show_percentages:
return
fmt = '%d' if self._show_counts is True else self._show_counts
if self._show_counts is True:
count_fmt = "%d"
else:
count_fmt = self._show_counts
if self._show_percentages is True:
pct_fmt = "%.1f%%"
else:
pct_fmt = self._show_percentages

total = sum(self.totals)
if count_fmt and pct_fmt:
if where == 'top':
fmt = '%s\n(%s)' % (count_fmt, pct_fmt)
else:
fmt = '%s (%s)' % (count_fmt, pct_fmt)

def make_args(val):
return val, 100 * val / total
elif count_fmt:
fmt = count_fmt

def make_args(val):
return val,
else:
fmt = pct_fmt

def make_args(val):
return 100 * val / total,

if where == 'right':
margin = 0.01 * abs(np.diff(ax.get_xlim()))
for rect in rects:
width = rect.get_width()
ax.text(width + margin,
rect.get_y() + rect.get_height() * .5,
fmt % width,
fmt % make_args(width),
ha='left', va='center')
elif where == 'left':
margin = 0.01 * abs(np.diff(ax.get_xlim()))
for rect in rects:
width = rect.get_width()
ax.text(width + margin,
rect.get_y() + rect.get_height() * .5,
fmt % width,
fmt % make_args(width),
ha='right', va='center')
elif where == 'top':
margin = 0.01 * abs(np.diff(ax.get_ylim()))
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width() * .5,
height + margin, fmt % height,
height + margin,
fmt % make_args(height),
ha='center', va='bottom')
else:
raise NotImplementedError('unhandled where: %r' % where)
Expand Down
37 changes: 32 additions & 5 deletions upsetplot/tests/test_upsetplot.py
Expand Up @@ -9,6 +9,7 @@
import numpy as np
import matplotlib.figure
import matplotlib.pyplot as plt
from matplotlib.text import Text

from upsetplot import plot
from upsetplot import UpSet
Expand All @@ -23,6 +24,11 @@ def is_ascending(seq):
return sorted(seq) == list(seq)


def get_all_texts(mpl_artist):
out = [text.get_text() for text in mpl_artist.findobj(Text)]
return [text for text in out if text]


@pytest.mark.parametrize('x', [
generate_counts(),
generate_counts().iloc[1:-2],
Expand Down Expand Up @@ -389,22 +395,43 @@ def _count_descendants(el):
@pytest.mark.parametrize('orientation', ['horizontal', 'vertical'])
def test_show_counts(orientation):
fig = matplotlib.figure.Figure()
X = generate_counts(n_samples=100)
plot(X, fig)
X = generate_counts(n_samples=10000)
plot(X, fig, orientation=orientation)
n_artists_no_sizes = _count_descendants(fig)

fig = matplotlib.figure.Figure()
plot(X, fig, show_counts=True)
plot(X, fig, orientation=orientation, show_counts=True)
n_artists_yes_sizes = _count_descendants(fig)
assert n_artists_yes_sizes - n_artists_no_sizes > 6
assert '9547' in get_all_texts(fig) # set size
assert '283' in get_all_texts(fig) # intersection size

fig = matplotlib.figure.Figure()
plot(X, fig, orientation=orientation, show_counts='%0.2g')
assert n_artists_yes_sizes == _count_descendants(fig)
assert '9.5e+03' in get_all_texts(fig)
assert '2.8e+02' in get_all_texts(fig)

fig = matplotlib.figure.Figure()
plot(X, fig, show_counts='%0.2g')
plot(X, fig, orientation=orientation, show_percentages=True)
assert n_artists_yes_sizes == _count_descendants(fig)
assert '47.1%' in get_all_texts(fig)
assert '1.4%' in get_all_texts(fig)

fig = matplotlib.figure.Figure()
plot(X, fig, orientation=orientation, show_counts=True,
show_percentages=True)
assert n_artists_yes_sizes == _count_descendants(fig)
if orientation == 'vertical':
assert '9547\n(47.1%)' in get_all_texts(fig)
assert '283 (1.4%)' in get_all_texts(fig)
else:
assert '9547 (47.1%)' in get_all_texts(fig)
assert '283\n(1.4%)' in get_all_texts(fig)

with pytest.raises(ValueError):
fig = matplotlib.figure.Figure()
plot(X, fig, show_counts='%0.2h')
plot(X, fig, orientation=orientation, show_counts='%0.2h')


def test_add_catplot():
Expand Down