3636import matplotlib .ticker as mticker
3737import matplotlib .transforms as mtransforms
3838import matplotlib .tri as mtri
39-
4039from matplotlib .container import BarContainer , ErrorbarContainer , StemContainer
4140
4241iterable = cbook .iterable
@@ -5470,14 +5469,15 @@ def xywhere(xs, ys, mask):
54705469
54715470 return errorbar_container # (l0, caplines, barcols)
54725471
5473- def boxplot (self , x , notch = 0 , sym = 'b+' , vert = 1 , whis = 1.5 ,
5472+ def boxplot (self , x , notch = False , sym = 'b+' , vert = True , whis = 1.5 ,
54745473 positions = None , widths = None , patch_artist = False ,
5475- bootstrap = None ):
5474+ bootstrap = None , usermedians = None , conf_intervals = None ):
54765475 """
54775476 Call signature::
54785477
5479- boxplot(x, notch=0, sym='+', vert=1, whis=1.5,
5480- positions=None, widths=None, patch_artist=False)
5478+ boxplot(x, notch=False, sym='+', vert=True, whis=1.5,
5479+ positions=None, widths=None, patch_artist=False,
5480+ bootstrap=None, usermedians=None, conf_intervals=None)
54815481
54825482 Make a box and whisker plot for each column of *x* or each
54835483 vector in sequence *x*. The box extends from the lower to
@@ -5490,59 +5490,110 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
54905490 *x* :
54915491 Array or a sequence of vectors.
54925492
5493- *notch* : [ 0 (default) | 1 ]
5494- If 0, produce a rectangular box plot.
5495- If 1, produce a notched box plot
5493+ *notch* : [ False (default) | True ]
5494+ If False (default), produces a rectangular box plot.
5495+ If True, will produce a notched box plot
54965496
5497- *sym* :
5498- (default 'b+') is the default symbol for flier points.
5497+ *sym* : [ default 'b+' ]
5498+ The default symbol for flier points.
54995499 Enter an empty string ('') if you don't want to show fliers.
55005500
5501- *vert* : [1 (default) | 0]
5502- If 1, make the boxes vertical.
5503- If 0, make horizontal boxes. (Odd, but kept for compatibility
5504- with MATLAB boxplots)
5501+ *vert* : [ False | True (default) ]
5502+ If True (default), makes the boxes vertical.
5503+ If False, makes horizontal boxes.
55055504
5506- *whis* : ( default 1.5)
5507- Defines the length of the whiskers as
5508- a function of the inner quartile range. They extend to the
5509- most extreme data point within ( ``whis*(75%-25%)`` ) data range.
5505+ *whis* : [ default 1.5 ]
5506+ Defines the length of the whiskers as a function of the inner
5507+ quartile range. They extend to the most extreme data point
5508+ within ( ``whis*(75%-25%)`` ) data range.
55105509
55115510 *bootstrap* : [ *None* (default) | integer ]
55125511 Specifies whether to bootstrap the confidence intervals
5513- around the median for notched boxplots. If *None*, no
5514- bootstrapping is performed, and notches are calculated
5515- using a Gaussian-based asymptotic approximation
5516- (see McGill, R., Tukey, J.W., and Larsen, W.A.,
5517- 1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
5518- specifies the number of times to bootstrap the median to
5519- determine its 95% confidence intervals. Values between 1000
5520- and 10000 are recommended.
5521-
5522- *positions* : (default 1,2,...,n)
5523- Sets the horizontal positions of
5524- the boxes. The ticks and limits are automatically set to match
5525- the positions.
5526-
5527- *widths* : [ scalar | array ]
5528- Either a scalar or a vector to set the width of each box.
5529- The default is 0.5, or ``0.15*(distance between extreme
5530- positions)`` if that is smaller.
5531-
5532- *patch_artist* : boolean
5533- If *False* (default), produce boxes with the
5534- :class:`~matplotlib.lines.Line2D` artist.
5535- If *True*, produce boxes with the
5536- :class:`~matplotlib.patches.Patch` artist.
5512+ around the median for notched boxplots. If bootstrap==None,
5513+ no bootstrapping is performed, and notches are calculated
5514+ using a Gaussian-based asymptotic approximation (see McGill, R.,
5515+ Tukey, J.W., and Larsen, W.A., 1978, and Kendall and Stuart,
5516+ 1967). Otherwise, bootstrap specifies the number of times to
5517+ bootstrap the median to determine it's 95% confidence intervals.
5518+ Values between 1000 and 10000 are recommended.
5519+
5520+ *usermedians* : [ default None ]
5521+ An array or sequence whose first dimension (or length) is
5522+ compatible with *x*. This overrides the medians computed by
5523+ matplotlib for each element of *usermedians* that is not None.
5524+ When an element of *usermedians* == None, the median will be
5525+ computed directly as normal.
5526+
5527+ *conf_intervals* : [ default None ]
5528+ Array or sequence whose first dimension (or length) is compatible
5529+ with *x* and whose second dimension is 2. When the current element
5530+ of *conf_intervals* is not None, the notch locations computed by
5531+ matplotlib are overridden (assuming notch is True). When an element of
5532+ *conf_intervals* is None, boxplot compute notches the method
5533+ specified by the other kwargs (e.g. *bootstrap*).
5534+
5535+ *positions* : [ default 1,2,...,n ]
5536+ Sets the horizontal positions of the boxes. The ticks and limits
5537+ are automatically set to match the positions.
5538+
5539+ *widths* : [ default 0.5 ]
5540+ Either a scalar or a vector and sets the width of each box. The
5541+ default is 0.5, or ``0.15*(distance between extreme positions)``
5542+ if that is smaller.
5543+
5544+ *patch_artist* : [ False (default) | True ]
5545+ If False produces boxes with the Line2D artist
5546+ If True produces boxes with the Patch artist
55375547
55385548 Returns a dictionary mapping each component of the boxplot
5539- to a list of the :class:`~matplotlib.lines.Line2D`
5540- instances created (unless *patch_artist* was *True*. See above.).
5549+ to a list of the :class:`matplotlib.lines.Line2D`
5550+ instances created. That disctionary has the following keys
5551+ (assuming vertical boxplots):
5552+ boxes: the main body of the boxplot showing the quartiles
5553+ and the median's confidence intervals if enabled.
5554+ medians: horizonal lines at the median of each box.
5555+ whiskers: the vertical lines extending to the most extreme,
5556+ non-outlier data points.
5557+ caps: the horizontal lines at the ends of the whiskers.
5558+ fliers: points representing data that extend beyone the
5559+ whiskers (outliers).
5560+
55415561
55425562 **Example:**
55435563
55445564 .. plot:: pyplots/boxplot_demo.py
55455565 """
5566+ def bootstrapMedian (data , N = 5000 ):
5567+ # determine 95% confidence intervals of the median
5568+ M = len (data )
5569+ percentile = [2.5 ,97.5 ]
5570+ estimate = np .zeros (N )
5571+ for n in range (N ):
5572+ bsIndex = np .random .random_integers (0 ,M - 1 ,M )
5573+ bsData = data [bsIndex ]
5574+ estimate [n ] = mlab .prctile (bsData , 50 )
5575+ CI = mlab .prctile (estimate , percentile )
5576+ return CI
5577+
5578+ def computeConfInterval (data , med , iq , bootstrap ):
5579+ if bootstrap is not None :
5580+ # Do a bootstrap estimate of notch locations.
5581+ # get conf. intervals around median
5582+ CI = bootstrapMedian (data , N = bootstrap )
5583+ notch_min = CI [0 ]
5584+ notch_max = CI [1 ]
5585+ else :
5586+ # Estimate notch locations using Gaussian-based
5587+ # asymptotic approximation.
5588+ #
5589+ # For discussion: McGill, R., Tukey, J.W.,
5590+ # and Larsen, W.A. (1978) "Variations of
5591+ # Boxplots", The American Statistician, 32:12-16.
5592+ N = len (data )
5593+ notch_min = med - 1.57 * iq / np .sqrt (N )
5594+ notch_max = med + 1.57 * iq / np .sqrt (N )
5595+ return notch_min , notch_max
5596+
55465597 if not self ._hold : self .cla ()
55475598 holdStatus = self ._hold
55485599 whiskers , caps , boxes , medians , fliers = [], [], [], [], []
@@ -5568,6 +5619,38 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
55685619 x = [x ]
55695620 col = len (x )
55705621
5622+ # sanitize user-input medians
5623+ msg1 = "usermedians must either be a list/tuple or a 1d array"
5624+ msg2 = "usermedians' length must be compatible with x"
5625+ if usermedians is not None :
5626+ if hasattr (usermedians , 'shape' ):
5627+ if len (usermedians .shape ) != 1 :
5628+ raise ValueError (msg1 )
5629+ elif usermedians .shape [0 ] != col :
5630+ raise ValueError (msg2 )
5631+ elif len (usermedians ) != col :
5632+ raise ValueError (msg2 )
5633+
5634+ #sanitize user-input confidence intervals
5635+ msg1 = "conf_intervals must either be a list of tuples or a 2d array"
5636+ msg2 = "conf_intervals' length must be compatible with x"
5637+ msg3 = "each conf_interval, if specificied, must have two values"
5638+ if conf_intervals is not None :
5639+ if hasattr (conf_intervals , 'shape' ):
5640+ if len (conf_intervals .shape ) != 2 :
5641+ raise ValueError (msg1 )
5642+ elif conf_intervals .shape [0 ] != col :
5643+ raise ValueError (msg2 )
5644+ elif conf_intervals .shape [1 ] == 2 :
5645+ raise ValueError (msg3 )
5646+ else :
5647+ if len (conf_intervals ) != col :
5648+ raise ValueError (msg2 )
5649+ for ci in conf_intervals :
5650+ if ci is not None and len (ci ) != 2 :
5651+ raise ValueError (msg3 )
5652+
5653+
55715654 # get some plot info
55725655 if positions is None :
55735656 positions = range (1 , col + 1 )
@@ -5579,14 +5662,21 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
55795662
55805663 # loop through columns, adding each to plot
55815664 self .hold (True )
5582- for i ,pos in enumerate (positions ):
5665+ for i , pos in enumerate (positions ):
55835666 d = np .ravel (x [i ])
55845667 row = len (d )
55855668 if row == 0 :
55865669 # no data, skip this position
55875670 continue
5671+
55885672 # get median and quartiles
55895673 q1 , med , q3 = mlab .prctile (d ,[25 ,50 ,75 ])
5674+
5675+ # replace with input medians if available
5676+ if usermedians is not None :
5677+ if usermedians [i ] is not None :
5678+ med = usermedians [i ]
5679+
55905680 # get high extreme
55915681 iq = q3 - q1
55925682 hi_val = q3 + whis * iq
@@ -5626,42 +5716,16 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
56265716 # get y location for median
56275717 med_y = [med , med ]
56285718
5629- # calculate 'regular' plot
5630- if notch == 0 :
5631- # make our box vectors
5632- box_x = [box_x_min , box_x_max , box_x_max , box_x_min , box_x_min ]
5633- box_y = [q1 , q1 , q3 , q3 , q1 ]
5634- # make our median line vectors
5635- med_x = [box_x_min , box_x_max ]
56365719 # calculate 'notch' plot
5637- else :
5638- if bootstrap is not None :
5639- # Do a bootstrap estimate of notch locations.
5640- def bootstrapMedian (data , N = 5000 ):
5641- # determine 95% confidence intervals of the median
5642- M = len (data )
5643- percentile = [2.5 ,97.5 ]
5644- estimate = np .zeros (N )
5645- for n in range (N ):
5646- bsIndex = np .random .random_integers (0 ,M - 1 ,M )
5647- bsData = data [bsIndex ]
5648- estimate [n ] = mlab .prctile (bsData , 50 )
5649- CI = mlab .prctile (estimate , percentile )
5650- return CI
5651-
5652- # get conf. intervals around median
5653- CI = bootstrapMedian (d , N = bootstrap )
5654- notch_max = CI [1 ]
5655- notch_min = CI [0 ]
5720+ if notch :
5721+ # conf. intervals from user, if available
5722+ if conf_intervals is not None and conf_intervals [i ] is not None :
5723+ notch_max = np .max (conf_intervals [i ])
5724+ notch_min = np .min (conf_intervals [i ])
56565725 else :
5657- # Estimate notch locations using Gaussian-based
5658- # asymptotic approximation.
5659- #
5660- # For discussion: McGill, R., Tukey, J.W.,
5661- # and Larsen, W.A. (1978) "Variations of
5662- # Boxplots", The American Statistician, 32:12-16.
5663- notch_max = med + 1.57 * iq / np .sqrt (row )
5664- notch_min = med - 1.57 * iq / np .sqrt (row )
5726+ notch_min , notch_max = computeConfInterval (d , med , iq ,
5727+ bootstrap )
5728+
56655729 # make our notched box vectors
56665730 box_x = [box_x_min , box_x_max , box_x_max , cap_x_max , box_x_max ,
56675731 box_x_max , box_x_min , box_x_min , cap_x_min , box_x_min ,
@@ -5671,6 +5735,13 @@ def bootstrapMedian(data, N=5000):
56715735 # make our median line vectors
56725736 med_x = [cap_x_min , cap_x_max ]
56735737 med_y = [med , med ]
5738+ # calculate 'regular' plot
5739+ else :
5740+ # make our box vectors
5741+ box_x = [box_x_min , box_x_max , box_x_max , box_x_min , box_x_min ]
5742+ box_y = [q1 , q1 , q3 , q3 , q1 ]
5743+ # make our median line vectors
5744+ med_x = [box_x_min , box_x_max ]
56745745
56755746 def to_vc (xs ,ys ):
56765747 # convert arguments to verts and codes
@@ -5720,12 +5791,13 @@ def dopatch(xs,ys):
57205791 boxes .extend (dopatch (box_x , box_y ))
57215792 else :
57225793 boxes .extend (doplot (box_x , box_y , 'b-' ))
5794+
57235795 medians .extend (doplot (med_x , med_y , median_color + '-' ))
57245796 fliers .extend (doplot (flier_hi_x , flier_hi , sym ,
57255797 flier_lo_x , flier_lo , sym ))
57265798
57275799 # fix our axes/ticks up a little
5728- if 1 == vert :
5800+ if vert :
57295801 setticks , setlim = self .set_xticks , self .set_xlim
57305802 else :
57315803 setticks , setlim = self .set_yticks , self .set_ylim
0 commit comments