/
facet_grid.py
320 lines (262 loc) · 10.9 KB
/
facet_grid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import re
import pandas as pd
import six
from ..utils import ninteraction, add_margins, cross_join
from ..utils import match, join_keys
from ..exceptions import PlotnineError
from .facet import facet, layout_null, combine_vars, add_missing_facets
from .facet import eval_facet_vars
class facet_grid(facet):
"""
Wrap 1D Panels onto 2D surface
Parameters
----------
facets : formula | tuple | list
A formula with the rows (of the tabular display) on
the LHS and the columns (of the tabular display) on
the RHS; the dot in the formula is used to indicate
there should be no faceting on this dimension
(either row or column). If a tuple/list is used, it
must of size two, the elements of which must be
strings or lists.
scales : 'fixed' | 'free' | 'free_x' | 'free_y'
Whether ``x`` or ``y`` scales should be allowed (free)
to vary according to the data on each of the panel.
Default is ``'fixed'``.
space : 'fixed' | 'free' | 'free_x' | 'free_y'
Whether the ``x`` or ``y`` sides of the panels
should have the size. It also depends to the
``scales`` parameter. Default is ``'fixed'``.
This setting is not properly supported at the moment.
shrink : bool
Whether to shrink the scales to the output of the
statistics instead of the raw data. Default is ``True``.
labeller : str | function
How to label the facets. If it is a ``str``, it should
be one of ``'label_value'`` ``'label_both'`` or
``'label_context'``. Default is ``'label_value'``
as_table : bool
If ``True``, the facets are laid out like a table with
the highest values at the bottom-right. If ``False``
the facets are laid out like a plot with the highest
value a the top-right. Default it ``True``.
drop : bool
If ``True``, all factor levels not used in the data
will automatically be dropped. If ``False``, all
factor levels will be shown, regardless of whether
or not they appear in the data. Default is ``True``.
"""
def __init__(self, facets, margins=False, scales='fixed',
space='fixed', shrink=True, labeller='label_value',
as_table=True, drop=True):
facet.__init__(
self, scales=scales, shrink=shrink, labeller=labeller,
as_table=as_table, drop=drop)
self.rows, self.cols = parse_grid_facets(facets)
self.margins = margins
self.space_free = {'x': space in ('free_x', 'free'),
'y': space in ('free_y', 'free')}
self.num_vars_x = len(self.cols)
self.num_vars_y = len(self.rows)
def compute_layout(self, data):
if not self.rows and not self.cols:
return layout_null()
base_rows = combine_vars(data, self.plot.environment,
self.rows, drop=self.drop)
if not self.as_table:
# Reverse the order of the rows
base_rows = base_rows[::-1]
base_cols = combine_vars(data, self.plot.environment,
self.cols, drop=self.drop)
base = cross_join(base_rows, base_cols)
if self.margins:
base = add_margins(base, [self.rows, self.cols], self.margins)
base = base.drop_duplicates().reset_index(drop=True)
n = len(base)
panel = ninteraction(base, drop=True)
panel = pd.Categorical(panel, categories=range(1, n+1))
if self.rows:
rows = ninteraction(base[self.rows], drop=True)
else:
rows = 1
if self.cols:
cols = ninteraction(base[self.cols], drop=True)
else:
cols = 1
layout = pd.DataFrame({'PANEL': panel,
'ROW': rows,
'COL': cols})
layout = pd.concat([layout, base], axis=1)
layout = layout.sort_values('PANEL')
layout.reset_index(drop=True, inplace=True)
# Relax constraints, if necessary
layout['SCALE_X'] = layout['COL'] if self.free['x'] else 1
layout['SCALE_Y'] = layout['ROW'] if self.free['y'] else 1
self.nrow = layout['ROW'].max()
self.ncol = layout['COL'].max()
return layout
def map(self, data, layout):
if not len(data):
data['PANEL'] = pd.Categorical(
[],
categories=layout['PANEL'].cat.categories,
ordered=True)
return data
vars = [x for x in self.rows + self.cols]
margin_vars = [list(data.columns & self.rows),
list(data.columns & self.cols)]
data = add_margins(data, margin_vars, self.margins)
facet_vals = eval_facet_vars(data, vars, self.plot.environment)
data, facet_vals = add_missing_facets(data, layout,
vars, facet_vals)
# assign each point to a panel
if len(facet_vals) == 0:
# Special case of no facetting
data['PANEL'] = 1
else:
keys = join_keys(facet_vals, layout, vars)
data['PANEL'] = match(keys['x'], keys['y'], start=1)
data = data.sort_values('PANEL', kind='mergesort')
# matching dtype and
# the categories(panel numbers) for the data should be in the
# same order as the panels. i.e the panels are the reference,
# they "know" the right order
data['PANEL'] = pd.Categorical(
data['PANEL'],
categories=layout['PANEL'].cat.categories,
ordered=True)
data.reset_index(drop=True, inplace=True)
return data
def set_breaks_and_labels(self, ranges, layout_info, pidx):
ax = self.axs[pidx]
facet.set_breaks_and_labels(
self, ranges, layout_info, pidx)
bottomrow = layout_info['ROW'] == self.nrow
leftcol = layout_info['COL'] == 1
if bottomrow:
ax.xaxis.set_ticks_position('bottom')
else:
ax.xaxis.set_ticks_position('none')
ax.xaxis.set_ticklabels([])
if leftcol:
ax.yaxis.set_ticks_position('left')
else:
ax.yaxis.set_ticks_position('none')
ax.yaxis.set_ticklabels([])
def spaceout_and_resize_panels(self):
"""
Adjust the spacing between the panels and resize them
to meet the aspect ratio
"""
ncol = self.ncol
nrow = self.nrow
figure = self.figure
theme = self.theme
get_property = theme.themeables.property
left = figure.subplotpars.left
right = figure.subplotpars.right
top = figure.subplotpars.top
bottom = figure.subplotpars.bottom
wspace = figure.subplotpars.wspace
W, H = figure.get_size_inches()
try:
spacing_x = get_property('panel_spacing_x')
except KeyError:
spacing_x = 0.1
try:
spacing_y = get_property('panel_spacing_y')
except KeyError:
spacing_y = 0.1
try:
aspect_ratio = get_property('aspect_ratio')
except KeyError:
# If the panels have different limits the coordinates
# cannot compute a common aspect ratio
if not self.free['x'] and not self.free['y']:
aspect_ratio = self.coordinates.aspect(
self.layout.panel_params[0])
else:
aspect_ratio = None
# The goal is to have equal spacing along the vertical
# and the horizontal. We use the wspace and compute
# the appropriate hspace. It would be a lot easier if
# MPL had a better layout manager.
# width of axes and height of axes
w = ((right-left)*W - spacing_x*(ncol-1)) / ncol
h = ((top-bottom)*H - spacing_y*(nrow-1)) / nrow
# aspect ratio changes the size of the figure
if aspect_ratio is not None:
h = w*aspect_ratio
H = (h*nrow + spacing_y*(nrow-1)) / (top-bottom)
figure.set_figheight(H)
# spacing
wspace = spacing_x/w
hspace = spacing_y/h
figure.subplots_adjust(wspace=wspace, hspace=hspace)
def draw_label(self, layout_info, ax):
"""
Draw facet label onto the axes.
This function will only draw labels if they are needed.
Parameters
----------
layout_info : dict-like
Layout information. Row from the `layout` table.
ax : axes
Axes to label
"""
toprow = layout_info['ROW'] == 1
rightcol = layout_info['COL'] == self.ncol
if toprow and len(self.cols):
label_info = layout_info[list(self.cols)]
label_info._meta = {'dimension': 'cols'}
label_info = self.labeller(label_info)
self.draw_strip_text(label_info, 'top', ax)
if rightcol and len(self.rows):
label_info = layout_info[list(self.rows)]
label_info._meta = {'dimension': 'rows'}
label_info = self.labeller(label_info)
self.draw_strip_text(label_info, 'right', ax)
def parse_grid_facets(facets):
"""
Return two lists of facetting variables, for the rows & columns
"""
valid_forms = ['var1 ~ .', 'var1 ~ var2', '. ~ var1',
'var1 + var2 ~ var3 + var4',
"('var1', '.')", "('var1', 'var2')",
"('.', 'var1')", "((var1, var2), (var3, var4))",
]
error_msg_f = ("Valid formula for 'facet_grid' look like"
" {}".format(valid_forms))
valid_seqs = ["('var1', '.')", "('var1', 'var2')",
"('.', 'var1')", "((var1, var2), (var3, var4))"]
error_msg_s = ("Valid sequences for specifying 'facets' look like"
" {}".format(valid_seqs))
if isinstance(facets, (tuple, list)):
if len(facets) != 2:
raise PlotnineError(error_msg_s)
rows, cols = facets
if isinstance(rows, six.string_types):
rows = [] if rows == '.' else [rows]
if isinstance(cols, six.string_types):
cols = [] if cols == '.' else [cols]
return rows, cols
if not isinstance(facets, six.string_types):
raise PlotnineError(error_msg_f)
variables_pattern = r'(\w+(?:\s*\+\s*\w+)*|\.)'
pattern = r'\s*{0}\s*~\s*{0}\s*'.format(variables_pattern)
match = re.match(pattern, facets)
if not match:
raise PlotnineError(error_msg_s)
lhs = match.group(1)
rhs = match.group(2)
if lhs == '.':
rows = []
else:
rows = [var.strip() for var in lhs.split('+')]
if rhs == '.':
cols = []
else:
cols = [var.strip() for var in rhs.split('+')]
return rows, cols