Skip to content

Commit 447a7cc

Browse files
committed
Merged @phobson's boxplot enhancements.
1 parent 41822d5 commit 447a7cc

File tree

8 files changed

+670
-93
lines changed

8 files changed

+670
-93
lines changed

doc/users/whats_new.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ Two new defaults are available in the matplotlibrc configuration file.
6565
These are savefig.bbox, which can be set to 'standard' or 'tight,' and
6666
savefig.pad_inches, which controls the bounding box padding.
6767

68+
69+
New Boxplot Functionality
70+
-------------------------
71+
72+
Users can now incorporate their own methods for computing the median and its
73+
confidence intervals into the boxplot method. For every column of data passed
74+
to boxplot, the user can specify an accompanying median and confidence
75+
interval.
76+
:meth: `matplotlib.axes.boxplot`
77+
.. plot:: examples/pylab_examples/boxplot_demo3.py
78+
79+
6880
.. _whats-new-1-1:
6981

7082
new in matplotlib-1.1

examples/pylab_examples/boxplot_demo3.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,48 @@
22
import matplotlib.transforms as mtransforms
33
import numpy as np
44

5+
def fakeBootStrapper(n):
6+
'''
7+
This is just a placeholder for the user's method of
8+
bootstrapping the median and its confidence intervals.
9+
10+
Returns an arbitrary median and confidence intervals
11+
packed into a tuple
12+
'''
13+
if n == 1:
14+
med = 0.1
15+
CI = (-0.25, 0.25)
16+
else:
17+
med = 0.2
18+
CI = (-0.35, 0.50)
19+
20+
return med, CI
21+
22+
23+
524
np.random.seed(2)
625
inc = 0.1
7-
e1 = np.random.uniform(0,1, size=(500,))
8-
e2 = np.random.uniform(0,1, size=(500,))
9-
e3 = np.random.uniform(0,1 + inc, size=(500,))
10-
e4 = np.random.uniform(0,1 + 2*inc, size=(500,))
26+
e1 = np.random.normal(0, 1, size=(500,))
27+
e2 = np.random.normal(0, 1, size=(500,))
28+
e3 = np.random.normal(0, 1 + inc, size=(500,))
29+
e4 = np.random.normal(0, 1 + 2*inc, size=(500,))
1130

1231
treatments = [e1,e2,e3,e4]
32+
med1, CI1 = fakeBootStrapper(1)
33+
med2, CI2 = fakeBootStrapper(2)
34+
medians = [None, None, med1, med2]
35+
conf_intervals = [None, None, CI1, CI2]
1336

1437
fig = plt.figure()
1538
ax = fig.add_subplot(111)
1639
pos = np.array(range(len(treatments)))+1
17-
bp = ax.boxplot( treatments, sym='k+', patch_artist=True,
18-
positions=pos, notch=1, bootstrap=5000 )
19-
text_transform= mtransforms.blended_transform_factory(ax.transData,
20-
ax.transAxes)
40+
bp = ax.boxplot(treatments, sym='k+', positions=pos,
41+
notch=1, bootstrap=5000,
42+
usermedians=medians,
43+
conf_intervals=conf_intervals)
44+
2145
ax.set_xlabel('treatment')
2246
ax.set_ylabel('response')
23-
ax.set_ylim(-0.2, 1.4)
2447
plt.setp(bp['whiskers'], color='k', linestyle='-' )
2548
plt.setp(bp['fliers'], markersize=3.0)
26-
fig.subplots_adjust(right=0.99,top=0.99)
2749
plt.show()

lib/matplotlib/axes.py

Lines changed: 152 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import matplotlib.ticker as mticker
3737
import matplotlib.transforms as mtransforms
3838
import matplotlib.tri as mtri
39-
4039
from matplotlib.container import BarContainer, ErrorbarContainer, StemContainer
4140

4241
iterable = cbook.iterable
@@ -5470,14 +5469,15 @@ def xywhere(xs, ys, mask):
54705469

54715470
return errorbar_container # (l0, caplines, barcols)
54725471

5473-
def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
5472+
def boxplot(self, x, notch=False, sym='b+', vert=True, whis=1.5,
54745473
positions=None, widths=None, patch_artist=False,
5475-
bootstrap=None):
5474+
bootstrap=None, usermedians=None, conf_intervals=None):
54765475
"""
54775476
Call signature::
54785477
5479-
boxplot(x, notch=0, sym='+', vert=1, whis=1.5,
5480-
positions=None, widths=None, patch_artist=False)
5478+
boxplot(x, notch=False, sym='+', vert=True, whis=1.5,
5479+
positions=None, widths=None, patch_artist=False,
5480+
bootstrap=None, usermedians=None, conf_intervals=None)
54815481
54825482
Make a box and whisker plot for each column of *x* or each
54835483
vector in sequence *x*. The box extends from the lower to
@@ -5490,59 +5490,110 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
54905490
*x* :
54915491
Array or a sequence of vectors.
54925492
5493-
*notch* : [ 0 (default) | 1]
5494-
If 0, produce a rectangular box plot.
5495-
If 1, produce a notched box plot
5493+
*notch* : [ False (default) | True ]
5494+
If False (default), produces a rectangular box plot.
5495+
If True, will produce a notched box plot
54965496
5497-
*sym* :
5498-
(default 'b+') is the default symbol for flier points.
5497+
*sym* : [ default 'b+' ]
5498+
The default symbol for flier points.
54995499
Enter an empty string ('') if you don't want to show fliers.
55005500
5501-
*vert* : [1 (default) | 0]
5502-
If 1, make the boxes vertical.
5503-
If 0, make horizontal boxes. (Odd, but kept for compatibility
5504-
with MATLAB boxplots)
5501+
*vert* : [ False | True (default) ]
5502+
If True (default), makes the boxes vertical.
5503+
If False, makes horizontal boxes.
55055504
5506-
*whis* : (default 1.5)
5507-
Defines the length of the whiskers as
5508-
a function of the inner quartile range. They extend to the
5509-
most extreme data point within ( ``whis*(75%-25%)`` ) data range.
5505+
*whis* : [ default 1.5 ]
5506+
Defines the length of the whiskers as a function of the inner
5507+
quartile range. They extend to the most extreme data point
5508+
within ( ``whis*(75%-25%)`` ) data range.
55105509
55115510
*bootstrap* : [ *None* (default) | integer ]
55125511
Specifies whether to bootstrap the confidence intervals
5513-
around the median for notched boxplots. If *None*, no
5514-
bootstrapping is performed, and notches are calculated
5515-
using a Gaussian-based asymptotic approximation
5516-
(see McGill, R., Tukey, J.W., and Larsen, W.A.,
5517-
1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
5518-
specifies the number of times to bootstrap the median to
5519-
determine its 95% confidence intervals. Values between 1000
5520-
and 10000 are recommended.
5521-
5522-
*positions* : (default 1,2,...,n)
5523-
Sets the horizontal positions of
5524-
the boxes. The ticks and limits are automatically set to match
5525-
the positions.
5526-
5527-
*widths* : [ scalar | array ]
5528-
Either a scalar or a vector to set the width of each box.
5529-
The default is 0.5, or ``0.15*(distance between extreme
5530-
positions)`` if that is smaller.
5531-
5532-
*patch_artist* : boolean
5533-
If *False* (default), produce boxes with the
5534-
:class:`~matplotlib.lines.Line2D` artist.
5535-
If *True*, produce boxes with the
5536-
:class:`~matplotlib.patches.Patch` artist.
5512+
around the median for notched boxplots. If bootstrap==None,
5513+
no bootstrapping is performed, and notches are calculated
5514+
using a Gaussian-based asymptotic approximation (see McGill, R.,
5515+
Tukey, J.W., and Larsen, W.A., 1978, and Kendall and Stuart,
5516+
1967). Otherwise, bootstrap specifies the number of times to
5517+
bootstrap the median to determine it's 95% confidence intervals.
5518+
Values between 1000 and 10000 are recommended.
5519+
5520+
*usermedians* : [ default None ]
5521+
An array or sequence whose first dimension (or length) is
5522+
compatible with *x*. This overrides the medians computed by
5523+
matplotlib for each element of *usermedians* that is not None.
5524+
When an element of *usermedians* == None, the median will be
5525+
computed directly as normal.
5526+
5527+
*conf_intervals* : [ default None ]
5528+
Array or sequence whose first dimension (or length) is compatible
5529+
with *x* and whose second dimension is 2. When the current element
5530+
of *conf_intervals* is not None, the notch locations computed by
5531+
matplotlib are overridden (assuming notch is True). When an element of
5532+
*conf_intervals* is None, boxplot compute notches the method
5533+
specified by the other kwargs (e.g. *bootstrap*).
5534+
5535+
*positions* : [ default 1,2,...,n ]
5536+
Sets the horizontal positions of the boxes. The ticks and limits
5537+
are automatically set to match the positions.
5538+
5539+
*widths* : [ default 0.5 ]
5540+
Either a scalar or a vector and sets the width of each box. The
5541+
default is 0.5, or ``0.15*(distance between extreme positions)``
5542+
if that is smaller.
5543+
5544+
*patch_artist* : [ False (default) | True ]
5545+
If False produces boxes with the Line2D artist
5546+
If True produces boxes with the Patch artist
55375547
55385548
Returns a dictionary mapping each component of the boxplot
5539-
to a list of the :class:`~matplotlib.lines.Line2D`
5540-
instances created (unless *patch_artist* was *True*. See above.).
5549+
to a list of the :class:`matplotlib.lines.Line2D`
5550+
instances created. That disctionary has the following keys
5551+
(assuming vertical boxplots):
5552+
boxes: the main body of the boxplot showing the quartiles
5553+
and the median's confidence intervals if enabled.
5554+
medians: horizonal lines at the median of each box.
5555+
whiskers: the vertical lines extending to the most extreme,
5556+
non-outlier data points.
5557+
caps: the horizontal lines at the ends of the whiskers.
5558+
fliers: points representing data that extend beyone the
5559+
whiskers (outliers).
5560+
55415561
55425562
**Example:**
55435563
55445564
.. plot:: pyplots/boxplot_demo.py
55455565
"""
5566+
def bootstrapMedian(data, N=5000):
5567+
# determine 95% confidence intervals of the median
5568+
M = len(data)
5569+
percentile = [2.5,97.5]
5570+
estimate = np.zeros(N)
5571+
for n in range(N):
5572+
bsIndex = np.random.random_integers(0,M-1,M)
5573+
bsData = data[bsIndex]
5574+
estimate[n] = mlab.prctile(bsData, 50)
5575+
CI = mlab.prctile(estimate, percentile)
5576+
return CI
5577+
5578+
def computeConfInterval(data, med, iq, bootstrap):
5579+
if bootstrap is not None:
5580+
# Do a bootstrap estimate of notch locations.
5581+
# get conf. intervals around median
5582+
CI = bootstrapMedian(data, N=bootstrap)
5583+
notch_min = CI[0]
5584+
notch_max = CI[1]
5585+
else:
5586+
# Estimate notch locations using Gaussian-based
5587+
# asymptotic approximation.
5588+
#
5589+
# For discussion: McGill, R., Tukey, J.W.,
5590+
# and Larsen, W.A. (1978) "Variations of
5591+
# Boxplots", The American Statistician, 32:12-16.
5592+
N = len(data)
5593+
notch_min = med - 1.57*iq/np.sqrt(N)
5594+
notch_max = med + 1.57*iq/np.sqrt(N)
5595+
return notch_min, notch_max
5596+
55465597
if not self._hold: self.cla()
55475598
holdStatus = self._hold
55485599
whiskers, caps, boxes, medians, fliers = [], [], [], [], []
@@ -5568,6 +5619,38 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
55685619
x = [x]
55695620
col = len(x)
55705621

5622+
# sanitize user-input medians
5623+
msg1 = "usermedians must either be a list/tuple or a 1d array"
5624+
msg2 = "usermedians' length must be compatible with x"
5625+
if usermedians is not None:
5626+
if hasattr(usermedians, 'shape'):
5627+
if len(usermedians.shape) != 1:
5628+
raise ValueError(msg1)
5629+
elif usermedians.shape[0] != col:
5630+
raise ValueError(msg2)
5631+
elif len(usermedians) != col:
5632+
raise ValueError(msg2)
5633+
5634+
#sanitize user-input confidence intervals
5635+
msg1 = "conf_intervals must either be a list of tuples or a 2d array"
5636+
msg2 = "conf_intervals' length must be compatible with x"
5637+
msg3 = "each conf_interval, if specificied, must have two values"
5638+
if conf_intervals is not None:
5639+
if hasattr(conf_intervals, 'shape'):
5640+
if len(conf_intervals.shape) != 2:
5641+
raise ValueError(msg1)
5642+
elif conf_intervals.shape[0] != col:
5643+
raise ValueError(msg2)
5644+
elif conf_intervals.shape[1] == 2:
5645+
raise ValueError(msg3)
5646+
else:
5647+
if len(conf_intervals) != col:
5648+
raise ValueError(msg2)
5649+
for ci in conf_intervals:
5650+
if ci is not None and len(ci) != 2:
5651+
raise ValueError(msg3)
5652+
5653+
55715654
# get some plot info
55725655
if positions is None:
55735656
positions = range(1, col + 1)
@@ -5579,14 +5662,21 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
55795662

55805663
# loop through columns, adding each to plot
55815664
self.hold(True)
5582-
for i,pos in enumerate(positions):
5665+
for i, pos in enumerate(positions):
55835666
d = np.ravel(x[i])
55845667
row = len(d)
55855668
if row==0:
55865669
# no data, skip this position
55875670
continue
5671+
55885672
# get median and quartiles
55895673
q1, med, q3 = mlab.prctile(d,[25,50,75])
5674+
5675+
# replace with input medians if available
5676+
if usermedians is not None:
5677+
if usermedians[i] is not None:
5678+
med = usermedians[i]
5679+
55905680
# get high extreme
55915681
iq = q3 - q1
55925682
hi_val = q3 + whis*iq
@@ -5626,42 +5716,16 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
56265716
# get y location for median
56275717
med_y = [med, med]
56285718

5629-
# calculate 'regular' plot
5630-
if notch == 0:
5631-
# make our box vectors
5632-
box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min ]
5633-
box_y = [q1, q1, q3, q3, q1 ]
5634-
# make our median line vectors
5635-
med_x = [box_x_min, box_x_max]
56365719
# calculate 'notch' plot
5637-
else:
5638-
if bootstrap is not None:
5639-
# Do a bootstrap estimate of notch locations.
5640-
def bootstrapMedian(data, N=5000):
5641-
# determine 95% confidence intervals of the median
5642-
M = len(data)
5643-
percentile = [2.5,97.5]
5644-
estimate = np.zeros(N)
5645-
for n in range(N):
5646-
bsIndex = np.random.random_integers(0,M-1,M)
5647-
bsData = data[bsIndex]
5648-
estimate[n] = mlab.prctile(bsData, 50)
5649-
CI = mlab.prctile(estimate, percentile)
5650-
return CI
5651-
5652-
# get conf. intervals around median
5653-
CI = bootstrapMedian(d, N=bootstrap)
5654-
notch_max = CI[1]
5655-
notch_min = CI[0]
5720+
if notch:
5721+
# conf. intervals from user, if available
5722+
if conf_intervals is not None and conf_intervals[i] is not None:
5723+
notch_max = np.max(conf_intervals[i])
5724+
notch_min = np.min(conf_intervals[i])
56565725
else:
5657-
# Estimate notch locations using Gaussian-based
5658-
# asymptotic approximation.
5659-
#
5660-
# For discussion: McGill, R., Tukey, J.W.,
5661-
# and Larsen, W.A. (1978) "Variations of
5662-
# Boxplots", The American Statistician, 32:12-16.
5663-
notch_max = med + 1.57*iq/np.sqrt(row)
5664-
notch_min = med - 1.57*iq/np.sqrt(row)
5726+
notch_min, notch_max = computeConfInterval(d, med, iq,
5727+
bootstrap)
5728+
56655729
# make our notched box vectors
56665730
box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
56675731
box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,
@@ -5671,6 +5735,13 @@ def bootstrapMedian(data, N=5000):
56715735
# make our median line vectors
56725736
med_x = [cap_x_min, cap_x_max]
56735737
med_y = [med, med]
5738+
# calculate 'regular' plot
5739+
else:
5740+
# make our box vectors
5741+
box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min ]
5742+
box_y = [q1, q1, q3, q3, q1 ]
5743+
# make our median line vectors
5744+
med_x = [box_x_min, box_x_max]
56745745

56755746
def to_vc(xs,ys):
56765747
# convert arguments to verts and codes
@@ -5720,12 +5791,13 @@ def dopatch(xs,ys):
57205791
boxes.extend(dopatch(box_x, box_y))
57215792
else:
57225793
boxes.extend(doplot(box_x, box_y, 'b-'))
5794+
57235795
medians.extend(doplot(med_x, med_y, median_color+'-'))
57245796
fliers.extend(doplot(flier_hi_x, flier_hi, sym,
57255797
flier_lo_x, flier_lo, sym))
57265798

57275799
# fix our axes/ticks up a little
5728-
if 1 == vert:
5800+
if vert:
57295801
setticks, setlim = self.set_xticks, self.set_xlim
57305802
else:
57315803
setticks, setlim = self.set_yticks, self.set_ylim

0 commit comments

Comments
 (0)