Skip to content

Commit b62531e

Browse files
committed
Add option to bootstrap confidence intervals for boxplot (Paul Hobson)
svn path=/trunk/matplotlib/; revision=8127
1 parent 30daf2f commit b62531e

File tree

3 files changed

+70
-3
lines changed

3 files changed

+70
-3
lines changed

CHANGELOG

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
2010-02-11 Added 'bootstrap' option to boxplot. This allows bootstrap
2+
estimates of median confidence intervals. Based on an
3+
initial patch by Paul Hobson. - ADS
4+
15
2010-02-06 Added setup.cfg "basedirlist" option to override setting
26
in setupext.py "basedir" dictionary; added "gnu0"
37
platform requested by Benjamin Drung. - EF
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import matplotlib.pyplot as plt
2+
import matplotlib.transforms as mtransforms
3+
import numpy as np
4+
5+
np.random.seed(2)
6+
inc = 0.1
7+
e1 = np.random.uniform(0,1, size=(500,))
8+
e2 = np.random.uniform(0,1, size=(500,))
9+
e3 = np.random.uniform(0,1 + inc, size=(500,))
10+
e4 = np.random.uniform(0,1 + 2*inc, size=(500,))
11+
12+
treatments = [e1,e2,e3,e4]
13+
14+
fig = plt.figure()
15+
ax = fig.add_subplot(111)
16+
pos = np.array(range(len(treatments)))+1
17+
bp = ax.boxplot( treatments, sym='k+', patch_artist=True,
18+
positions=pos, notch=1, bootstrap=5000 )
19+
text_transform= mtransforms.blended_transform_factory(ax.transData,
20+
ax.transAxes)
21+
ax.set_xlabel('treatment')
22+
ax.set_ylabel('response')
23+
ax.set_ylim(-0.2, 1.4)
24+
plt.setp(bp['whiskers'], color='k', linestyle='-' )
25+
plt.setp(bp['fliers'], markersize=3.0)
26+
fig.subplots_adjust(right=0.99,top=0.99)
27+
plt.show()

lib/matplotlib/axes.py

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4881,7 +4881,8 @@ def xywhere(xs, ys, mask):
48814881
return (l0, caplines, barcols)
48824882

48834883
def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
4884-
positions=None, widths=None, patch_artist=False):
4884+
positions=None, widths=None, patch_artist=False,
4885+
bootstrap=None):
48854886
"""
48864887
call signature::
48874888
@@ -4910,6 +4911,16 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
49104911
a function of the inner quartile range. They extend to the
49114912
most extreme data point within ( ``whis*(75%-25%)`` ) data range.
49124913
4914+
*bootstrap* (default None) specifies whether to bootstrap the
4915+
confidence intervals around the median for notched
4916+
boxplots. If bootstrap==None, no bootstrapping is performed,
4917+
and notches are calculated using a Gaussian-based asymptotic
4918+
approximation (see McGill, R., Tukey, J.W., and Larsen, W.A.,
4919+
1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
4920+
specifies the number of times to bootstrap the median to
4921+
determine it's 95% confidence intervals. Values between 1000
4922+
and 10000 are recommended.
4923+
49134924
*positions* (default 1,2,...,n) sets the horizontal positions of
49144925
the boxes. The ticks and limits are automatically set to match
49154926
the positions.
@@ -5021,8 +5032,33 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
50215032
med_x = [box_x_min, box_x_max]
50225033
# calculate 'notch' plot
50235034
else:
5024-
notch_max = med + 1.57*iq/np.sqrt(row)
5025-
notch_min = med - 1.57*iq/np.sqrt(row)
5035+
if bootstrap is not None:
5036+
# Do a bootstrap estimate of notch locations.
5037+
def bootstrapMedian(data, N=5000):
5038+
# determine 95% confidence intervals of the median
5039+
M = len(data)
5040+
percentile = [2.5,97.5]
5041+
estimate = np.zeros(N)
5042+
for n in range(N):
5043+
bsIndex = np.random.random_integers(0,M-1,M)
5044+
bsData = data[bsIndex]
5045+
estimate[n] = mlab.prctile(bsData, 50)
5046+
CI = mlab.prctile(estimate, percentile)
5047+
return CI
5048+
5049+
# get conf. intervals around median
5050+
CI = bootstrapMedian(d, N=bootstrap)
5051+
notch_max = CI[1]
5052+
notch_min = CI[0]
5053+
else:
5054+
# Estimate notch locations using Gaussian-based
5055+
# asymptotic approximation.
5056+
#
5057+
# For discussion: McGill, R., Tukey, J.W.,
5058+
# and Larsen, W.A. (1978) "Variations of
5059+
# Boxplots", The American Statistician, 32:12-16.
5060+
notch_max = med + 1.57*iq/np.sqrt(row)
5061+
notch_min = med - 1.57*iq/np.sqrt(row)
50265062
# make our notched box vectors
50275063
box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
50285064
box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,

0 commit comments

Comments
 (0)