Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #740 from jrrideout/issue_716

Added support for plotting empty distributions in box plots.
  • Loading branch information...
commit 14544ea43ad09aa121f65e1f77bd64a3fc428935 2 parents a88d75c + a2c6efa
@gregcaporaso gregcaporaso authored
View
96 qiime/pycogent_backports/distribution_plots.py
@@ -75,8 +75,6 @@ def generate_box_plots(distributions, x_values=None, x_tick_labels=None,
"""
# Make sure our input makes sense.
for distribution in distributions:
- if len(distribution) == 0:
- raise ValueError("Some of the provided distributions are empty.")
try:
map(float, distribution)
except:
@@ -91,6 +89,21 @@ def generate_box_plots(distributions, x_values=None, x_tick_labels=None,
widths=box_width)
if box_colors is not None:
+ if _is_single_matplotlib_color(box_colors):
+ box_colors = [box_colors] * len(box_plot['boxes'])
+ else:
+ # We check against the number of input distributions because mpl
+ # will only return non-empty boxplots from the boxplot() call
+ # above.
+ if len(box_colors) != len(distributions):
+ raise ValueError("Not enough colors were supplied to color "
+ "each boxplot.")
+
+ # Filter out colors corresponding to empty distributions.
+ box_colors = [color for distribution, color in zip(distributions,
+ box_colors)
+ if distribution]
+
_color_box_plot(plot_axes, box_plot, box_colors)
# Set up the various plotting options, such as x- and y-axis labels, plot
@@ -203,8 +216,6 @@ def generate_comparative_plots(plot_type, data, x_values=None,
assert (len(x_locations) == num_points), "The number of x_locations " +\
"does not match the number of data points."
- # Create the figure to put the plots on, as well as a list to store an
- # example of each distribution's plot (needed for the legend).
result, plot_axes = _create_plot()
# Iterate over each data point, and plot each of the distributions at that
@@ -430,65 +441,58 @@ def _plot_box_data(plot_axes, distribution, distribution_color,
distribution_width, x_position, whisker_length,
error_bar_type):
"""Returns the result of plotting a single boxplot in matplotlib."""
- box_plot = plot_axes.boxplot([distribution], positions=[x_position],
- widths=distribution_width,
- whis=whisker_length)
- _color_box_plot(plot_axes, box_plot, distribution_color)
- return box_plot
+ result = None
+
+ if len(distribution) > 0:
+ result = plot_axes.boxplot([distribution], positions=[x_position],
+ widths=distribution_width,
+ whis=whisker_length)
+ _color_box_plot(plot_axes, result, [distribution_color])
+
+ return result
def _color_box_plot(plot_axes, box_plot, colors):
- """Fill each box in the box plot with the specified color or colors.
+ """Color boxes in the box plot with the specified colors.
- If colors is a single string or tuple, all boxes are colored with it. If a
- list or tuple of colors is provided, each box will be colored with its
- corresponding color. If any of the colors are None, the box will not be
- colored (the default will be used).
+ If any of the colors are None, the box will not be colored.
The box_plot argument must be the dictionary returned by the call to
- matplotlib's boxplot function, and the color argument must be a valid
- matplotlib color.
+ matplotlib's boxplot function, and the colors argument must consist of
+ valid matplotlib colors.
"""
- # Note: the following code is largely taken from a matplotlib boxplot
+ # Note: the following code is largely taken from this matplotlib boxplot
# example:
# http://matplotlib.sourceforge.net/examples/pylab_examples/
# boxplot_demo2.html
- num_boxes = len(box_plot['boxes'])
-
- if _is_single_matplotlib_color(colors):
- box_colors = [colors] * num_boxes
- else:
- if len(colors) != num_boxes:
- raise ValueError("Not enough colors were supplied to color each "
- "boxplot.")
- box_colors = colors
-
- for box_num in range(num_boxes):
- box_color = box_colors[box_num]
+ if len(colors) != len(box_plot['boxes']):
+ raise ValueError("Not enough colors were supplied to color each "
+ "boxplot.")
- if box_color is not None:
- box = box_plot['boxes'][box_num]
- boxX = []
- boxY = []
+ for box, median, color in zip(box_plot['boxes'],
+ box_plot['medians'],
+ colors):
+ if color is not None:
+ box_x = []
+ box_y = []
# There are five points in the box. The first is the same as
# the last.
- for j in range(5):
- boxX.append(box.get_xdata()[j])
- boxY.append(box.get_ydata()[j])
+ for i in range(5):
+ box_x.append(box.get_xdata()[i])
+ box_y.append(box.get_ydata()[i])
- boxCoords = zip(boxX,boxY)
- boxPolygon = Polygon(boxCoords, facecolor=box_color)
- plot_axes.add_patch(boxPolygon)
+ box_coords = zip(box_x, box_y)
+ box_polygon = Polygon(box_coords, facecolor=color)
+ plot_axes.add_patch(box_polygon)
# Draw the median lines back over what we just filled in with
# color.
- median = box_plot['medians'][box_num]
- medianX = []
- medianY = []
- for j in range(2):
- medianX.append(median.get_xdata()[j])
- medianY.append(median.get_ydata()[j])
- plot_axes.plot(medianX, medianY, 'black')
+ median_x = []
+ median_y = []
+ for i in range(2):
+ median_x.append(median.get_xdata()[i])
+ median_y.append(median.get_ydata()[i])
+ plot_axes.plot(median_x, median_y, 'black')
def _is_single_matplotlib_color(color):
"""Returns True if color is a single (not a list) mpl color."""
View
10 qiime/stats.py
@@ -91,11 +91,11 @@ def all_pairs_t_test(labels, dists, tail_type='two-sided',
result += '# Entries marked with "N/A" could not be calculated because ' + \
'at least one of the groups\n# of distances was empty, ' + \
'both groups each contained only a single distance, or\n' + \
- '# the test could not be performed (e.g. no variance in the ' + \
- 'groups).\nGroup 1\tGroup 2\tt statistic\tParametric ' + \
- 'p-value\tParametric p-value (Bonferroni-corrected)\t' + \
- 'Nonparametric p-value\tNonparametric p-value ' + \
- '(Bonferroni-corrected)\n'
+ '# the test could not be performed (e.g. no variance in ' + \
+ 'groups with the same mean).\nGroup 1\tGroup 2\t' + \
+ 't statistic\tParametric p-value\tParametric p-value ' + \
+ '(Bonferroni-corrected)\tNonparametric p-value\t' + \
+ 'Nonparametric p-value (Bonferroni-corrected)\n'
stats = _perform_pairwise_tests(labels, dists, tail_type, num_permutations)
for stat in stats:
View
72 tests/test_pycogent_backports/test_distribution_plots.py
@@ -255,16 +255,10 @@ def test_plot_box_data(self):
self.assertEqual(len(result['caps']), 2)
def test_plot_box_data_empty(self):
- """_plot_box_data() should not error when given empty list of data,
- but should not plot anything."""
+ """Should ignore empty distribution."""
fig, ax = _create_plot()
result = _plot_box_data(ax, [], 'blue', 0.33, 55, 1.5, 'stdv')
- self.assertEqual(result.__class__.__name__, "dict")
- self.assertEqual(len(result['boxes']), 0)
- self.assertEqual(len(result['medians']), 0)
- self.assertEqual(len(result['whiskers']), 0)
- self.assertEqual(len(result['fliers']), 0)
- self.assertEqual(len(result['caps']), 0)
+ self.assertTrue(result is None)
def test_calc_data_point_locations_invalid_widths(self):
"""_calc_data_point_locations() should raise a ValueError
@@ -358,14 +352,56 @@ def test_generate_box_plots(self):
self.assertEqual(len(ax.get_xticklabels()), 3)
self.assertFloatEqual(ax.get_xticks(), [1, 4, 10])
+ def test_generate_box_plots_empty_distributions(self):
+ """Test functions correctly with empty distributions."""
+ fig = generate_box_plots([[1, 2, 3], [], [4, 5, 6]], [1, 4, 10],
+ ["Data 1", "Data 2", "Data 3"], "Test",
+ "x-axis label", "y-axis label")
+ ax = fig.get_axes()[0]
+ self.assertEqual(ax.get_title(), "Test")
+ self.assertEqual(ax.get_xlabel(), "x-axis label")
+ self.assertEqual(ax.get_ylabel(), "y-axis label")
+ self.assertEqual(len(ax.get_xticklabels()), 3)
+ self.assertFloatEqual(ax.get_xticks(), [1, 4, 10])
+
+ # All distributions are empty.
+ fig = generate_box_plots([[], [], []], [1, 4, 10],
+ ["Data 1", "Data 2", "Data 3"], "Test",
+ "x-axis label", "y-axis label")
+ ax = fig.get_axes()[0]
+ self.assertEqual(ax.get_title(), "Test")
+ self.assertEqual(ax.get_xlabel(), "x-axis label")
+ self.assertEqual(ax.get_ylabel(), "y-axis label")
+ self.assertEqual(len(ax.get_xticklabels()), 3)
+ self.assertFloatEqual(ax.get_xticks(), [1, 4, 10])
+
+ def test_generate_box_plots_box_colors(self):
+ """Test correctly handles coloring of box plots."""
+ # Coloring works with all empty distributions.
+ fig = generate_box_plots([[], [], []],
+ box_colors=['blue', 'red', 'yellow'])
+ ax = fig.get_axes()[0]
+ self.assertEqual(len(ax.get_xticklabels()), 3)
+
+ fig = generate_box_plots([[], [], []], box_colors='pink')
+ ax = fig.get_axes()[0]
+ self.assertEqual(len(ax.get_xticklabels()), 3)
+
+ # Coloring works with some empty distributions.
+ fig = generate_box_plots([[], [1, 2, 3.5], []],
+ box_colors=['blue', 'red', 'yellow'])
+ ax = fig.get_axes()[0]
+ self.assertEqual(len(ax.get_xticklabels()), 3)
+
def test_generate_box_plots_invalid_input(self):
"""Test correctly throws error on invalid input."""
- # Empty distribution.
- self.assertRaises(ValueError, generate_box_plots, [[1, 2, 3], []])
-
# Non-numeric entries in distribution.
self.assertRaises(ValueError, generate_box_plots, [[1, 'foo', 3]])
+ # Number of colors doesn't match number of distributions.
+ self.assertRaises(ValueError, generate_box_plots, [[1, 2, 3], [],
+ [4, 5, 6]], box_colors=['blue', 'red'])
+
# Invalid legend.
self.assertRaises(ValueError, generate_box_plots, [[1, 2, 3]],
legend=('foo', 'bar', 'baz'))
@@ -473,33 +509,27 @@ def test_generate_comparative_plots_error(self):
def test_color_box_plot(self):
"""Should not throw an exception when passed the proper input."""
- # Single color.
- fig, ax = _create_plot()
- box_plot = boxplot(self.ValidTypicalBoxData)
- _color_box_plot(ax, box_plot, 'blue')
-
- # Multiple colors.
fig, ax = _create_plot()
box_plot = boxplot(self.ValidTypicalBoxData)
_color_box_plot(ax, box_plot, ['blue', 'w', (1, 1, 0.9)])
- # Multiple colors (some are None).
+ # Some colors are None.
fig, ax = _create_plot()
box_plot = boxplot(self.ValidTypicalBoxData)
_color_box_plot(ax, box_plot, ['blue', None, (1, 1, 0.9)])
- # Multiple colors (all are None).
+ # All colors are None.
fig, ax = _create_plot()
box_plot = boxplot(self.ValidTypicalBoxData)
_color_box_plot(ax, box_plot, [None, None, None])
def test_color_box_plot_invalid_input(self):
"""Should throw an exception on invalid input."""
- # Single invalid color.
+ # Invalid color.
fig, ax = _create_plot()
box_plot = boxplot(self.ValidTypicalBoxData)
self.assertRaises(ValueError, _color_box_plot, ax, box_plot,
- 'foobarbaz')
+ ['red', 'foobarbaz', 'blue'])
# Wrong number of colors.
fig, ax = _create_plot()
View
8 tests/test_stats.py
@@ -206,7 +206,7 @@ def test_all_pairs_t_test(self):
# The nonparametric p-values contain the correct number of significant digits.
# Entries marked with "N/A" could not be calculated because at least one of the groups
# of distances was empty, both groups each contained only a single distance, or
-# the test could not be performed (e.g. no variance in the groups).
+# the test could not be performed (e.g. no variance in groups with the same mean).
Group 1 Group 2 t statistic Parametric p-value Parametric p-value (Bonferroni-corrected) Nonparametric p-value Nonparametric p-value (Bonferroni-corrected)
foo bar -6.6 0.00708047956412 0.0212414386924 0.095 0.285
foo baz -9.79795897113 0.000608184944463 0.00182455483339 0.101 0.303
@@ -220,7 +220,7 @@ def test_all_pairs_t_test_no_perms(self):
exp = """# The tests of significance were performed using a two-sided Student's two-sample t-test.
# Entries marked with "N/A" could not be calculated because at least one of the groups
# of distances was empty, both groups each contained only a single distance, or
-# the test could not be performed (e.g. no variance in the groups).
+# the test could not be performed (e.g. no variance in groups with the same mean).
Group 1 Group 2 t statistic Parametric p-value Parametric p-value (Bonferroni-corrected) Nonparametric p-value Nonparametric p-value (Bonferroni-corrected)
foo bar -6.6 0.00708047956412 0.0212414386924 N/A N/A
foo baz -9.79795897113 0.000608184944463 0.00182455483339 N/A N/A
@@ -237,7 +237,7 @@ def test_all_pairs_t_test_few_perms(self):
# The nonparametric p-values contain the correct number of significant digits.
# Entries marked with "N/A" could not be calculated because at least one of the groups
# of distances was empty, both groups each contained only a single distance, or
-# the test could not be performed (e.g. no variance in the groups).
+# the test could not be performed (e.g. no variance in groups with the same mean).
Group 1 Group 2 t statistic Parametric p-value Parametric p-value (Bonferroni-corrected) Nonparametric p-value Nonparametric p-value (Bonferroni-corrected)
foo bar -6.6 0.00354023978206 0.0106207193462 Too few iters to compute p-value (num_iters=5) Too few iters to compute p-value (num_iters=5)
foo baz -9.79795897113 0.000304092472232 0.000912277416695 Too few iters to compute p-value (num_iters=5) Too few iters to compute p-value (num_iters=5)
@@ -254,7 +254,7 @@ def test_all_pairs_t_test_invalid_tests(self):
# The nonparametric p-values contain the correct number of significant digits.
# Entries marked with "N/A" could not be calculated because at least one of the groups
# of distances was empty, both groups each contained only a single distance, or
-# the test could not be performed (e.g. no variance in the groups).
+# the test could not be performed (e.g. no variance in groups with the same mean).
Group 1 Group 2 t statistic Parametric p-value Parametric p-value (Bonferroni-corrected) Nonparametric p-value Nonparametric p-value (Bonferroni-corrected)
foo bar N/A N/A N/A N/A N/A
"""
Please sign in to comment.
Something went wrong with that request. Please try again.