You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When creating a boxplot plotting some continuous value against a categorical X axis, it fails when for any category there is only one sample in the data frame.
E.g. this works fine
df=pd.DataFrame(
{
'weight': np.random.normal(size=20),
# Creating two categories, one with 18 samples, one with 2 samples'category': pd.Categorical(18* [0] +2* [1], categories=[0,1], ordered=True)
}
)
(
ggplot(df, aes(x='category', y='weight'))
+geom_boxplot()
)
Producing the following plot:
However, this example fails:
df=pd.DataFrame(
{
'weight': np.random.normal(size=20),
# Creating two categories, one with 19 samples, one with 1 sample'category': pd.Categorical(19* [0] +1* [1], categories=[0,1], ordered=True)
}
)
(
ggplot(df, aes(x='category', y='weight'))
+geom_boxplot()
)
Below is the trace from the error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/.virtualenvs/pandas/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
691 type_pprinters=self.type_printers,
692 deferred_pprinters=self.deferred_printers)
--> 693 printer.pretty(obj)
694 printer.flush()
695 return stream.getvalue()
~/.virtualenvs/pandas/lib/python3.6/site-packages/IPython/lib/pretty.py in pretty(self, obj)
378 if callable(meth):
379 return meth(obj, self, cycle)
--> 380 return _default_pprint(obj, self, cycle)
381 finally:
382 self.end_group()
~/.virtualenvs/pandas/lib/python3.6/site-packages/IPython/lib/pretty.py in _default_pprint(obj, p, cycle)
493 if _safe_getattr(klass, '__repr__', None) is not object.__repr__:
494 # A user-provided repr. Find newlines and replace them with p.break_()
--> 495 _repr_pprint(obj, p, cycle)
496 return
497 p.begin_group(1, '<')
~/.virtualenvs/pandas/lib/python3.6/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
691 """A pprint that just redirects to the normal repr function."""
692 # Find newlines and replace them with p.break_()
--> 693 output = repr(obj)
694 for idx,output_line in enumerate(output.splitlines()):
695 if idx:
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/ggplot.py in __repr__(self)
81 Print/show the plot
82 """
---> 83 self.draw()
84 plt.show()
85 return '<ggplot: (%d)>' % self.__hash__()
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/ggplot.py in draw(self)
138 # assign a default theme
139 self = deepcopy(self)
--> 140 self._build()
141
142 # If no theme we use the default
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/ggplot.py in _build(self)
234
235 # Apply and map statistics
--> 236 layers.compute_statistic(layout)
237 layers.map_statistic(self)
238
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/layer.py in compute_statistic(self, layout)
92 def compute_statistic(self, layout):
93 for l in self:
---> 94 l.compute_statistic(layout)
95
96 def map_statistic(self, plot):
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/layer.py in compute_statistic(self, layout)
369 data = self.stat.use_defaults(data)
370 data = self.stat.setup_data(data)
--> 371 data = self.stat.compute_layer(data, params, layout)
372 self.data = data
373
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/stats/stat.py in compute_layer(cls, data, params, layout)
194 return cls.compute_panel(pdata, pscales, **params)
195
--> 196 return groupby_apply(data, 'PANEL', fn)
197
198 @classmethod
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/utils.py in groupby_apply(df, cols, func, *args, **kwargs)
615 # do not mark d as a slice of df i.e no SettingWithCopyWarning
616 d.is_copy = None
--> 617 lst.append(func(d, *args, **kwargs))
618 return pd.concat(lst, axis=axis, ignore_index=True)
619
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/stats/stat.py in fn(pdata)
192 return pdata
193 pscales = layout.get_scales(pdata['PANEL'].iat[0])
--> 194 return cls.compute_panel(pdata, pscales, **params)
195
196 return groupby_apply(data, 'PANEL', fn)
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/stats/stat.py in compute_panel(cls, data, scales, **params)
221 for _, old in data.groupby('group'):
222 old.is_copy = None
--> 223 new = cls.compute_group(old, scales, **params)
224 unique = uniquecols(old)
225 missing = unique.columns.difference(new.columns)
~/.virtualenvs/pandas/lib/python3.6/site-packages/plotnine/stats/stat_boxplot.py in compute_group(cls, data, scales, **params)
69 labels = ['x', 'y']
70 X = np.array(data[labels])
---> 71 res = boxplot_stats(X, whis=params['coef'], labels=labels)[1]
72 try:
73 n = data['weight'].sum()
~/.virtualenvs/pandas/lib/python3.6/site-packages/matplotlib/cbook.py in boxplot_stats(X, whis, bootstrap, labels, autorange)
1998 labels = repeat(None)
1999 elif len(labels) != ncols:
-> 2000 raise ValueError("Dimensions of labels and X must be compatible")
2001
2002 input_whis = whis
ValueError: Dimensions of labels and X must be compatible
This can be worked around by removing the classes with only a single sample from the data frame and overlaying only those with a geom_point(), as there is no interesting boxplot for them anyway, but it's a bit of a hassle and would be nicer if it just worked.
The text was updated successfully, but these errors were encountered:
When creating a boxplot plotting some continuous value against a categorical X axis, it fails when for any category there is only one sample in the data frame.
E.g. this works fine
Producing the following plot:
However, this example fails:
Below is the trace from the error:
This can be worked around by removing the classes with only a single sample from the data frame and overlaying only those with a
geom_point()
, as there is no interesting boxplot for them anyway, but it's a bit of a hassle and would be nicer if it just worked.The text was updated successfully, but these errors were encountered: