/
qplot.py
215 lines (182 loc) · 6.4 KB
/
qplot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
from contextlib import suppress
from warnings import warn
import pandas as pd
import pandas.api.types as pdtypes
import numpy as np
from patsy.eval import EvalEnvironment
from .ggplot import ggplot
from .aes import aes, all_aesthetics, scaled_aesthetics
from .labels import labs
from .facets import facet_null, facet_grid, facet_wrap
from .facets.facet_grid import parse_grid_facets
from .facets.facet_wrap import parse_wrap_facets
from .utils import Registry, is_string, array_kind
from .exceptions import PlotnineError, PlotnineWarning
from .scales import scale_x_log10, scale_y_log10
from .themes import theme
def qplot(x=None, y=None, data=None, facets=None, margins=False,
geom='auto', xlim=None, ylim=None, log='', main=None,
xlab=None, ylab=None, asp=None, **kwargs):
"""
Quick plot
Parameters
----------
x : str | array_like
x aesthetic
y : str | array_like
y aesthetic
data : dataframe
Data frame to use (optional). If not specified,
will create one, extracting arrays from the
current environment.
geom : str | list
*geom(s)* to do the drawing. If ``auto``, defaults
to 'point' if ``x`` and ``y`` are specified or
'histogram' if only ``x`` is specified.
xlim : tuple
x-axis limits
ylim : tuple
y-axis limits
log : str in ``{'x', 'y', 'xy'}``
Which variables to log transform.
main : str
Plot title
xlab : str
x-axis label
ylab : str
y-axis label
asp : str | float
The y/x aspect ratio.
**kwargs : dict
Arguments passed on to the geom.
Returns
-------
p : ggplot
ggplot object
"""
# Extract all recognizable aesthetic mappings from the parameters
# String values e.g "I('red')", "I(4)" are not treated as mappings
environment = EvalEnvironment.capture(1)
aesthetics = {} if x is None else {'x': x}
if y is not None:
aesthetics['y'] = y
def is_mapping(value):
"""
Return True if value is not enclosed in I() function
"""
with suppress(AttributeError):
return not (value.startswith('I(') and value.endswith(')'))
return True
def I(value):
return value
I_env = EvalEnvironment([{'I': I}])
for ae in kwargs.keys() & all_aesthetics:
value = kwargs[ae]
if is_mapping(value):
aesthetics[ae] = value
else:
kwargs[ae] = I_env.eval(value)
# List of geoms
if is_string(geom):
geom = [geom]
elif isinstance(geom, tuple):
geom = list(geom)
if data is None:
data = pd.DataFrame()
# Work out plot data, and modify aesthetics, if necessary
def replace_auto(lst, str2):
"""
Replace all occurences of 'auto' in with str2
"""
for i, value in enumerate(lst):
if value == 'auto':
lst[i] = str2
return lst
if 'auto' in geom:
if 'sample' in aesthetics:
replace_auto(geom, 'qq')
elif y is None:
# If x is discrete we choose geom_bar &
# geom_histogram otherwise. But we need to
# evaluate the mapping to find out the dtype
env = environment.with_outer_namespace(
{'factor': pd.Categorical})
if isinstance(aesthetics['x'], str):
try:
x = env.eval(aesthetics['x'], inner_namespace=data)
except Exception:
msg = "Could not evaluate aesthetic 'x={}'"
raise PlotnineError(msg.format(aesthetics['x']))
elif not hasattr(aesthetics['x'], 'dtype'):
x = np.asarray(aesthetics['x'])
if array_kind.discrete(x):
replace_auto(geom, 'bar')
else:
replace_auto(geom, 'histogram')
else:
if x is None:
if pdtypes.is_list_like(aesthetics['y']):
aesthetics['x'] = range(len(aesthetics['y']))
xlab = 'range(len(y))'
ylab = 'y'
else:
# We could solve the issue in layer.compute_asthetics
# but it is not worth the extra complexity
raise PlotnineError(
"Cannot infer how long x should be.")
replace_auto(geom, 'point')
p = ggplot(aes(**aesthetics), data=data, environment=environment)
def get_facet_type(facets):
with suppress(PlotnineError):
parse_grid_facets(facets)
return 'grid'
with suppress(PlotnineError):
parse_wrap_facets(facets)
return 'wrap'
warn("Could not determine the type of faceting, "
"therefore no faceting.", PlotnineWarning)
return 'null'
if facets:
facet_type = get_facet_type(facets)
if facet_type == 'grid':
p += facet_grid(facets, margins=margins)
elif facet_type == 'wrap':
p += facet_wrap(facets)
else:
p += facet_null()
# Add geoms
for g in geom:
geom_name = 'geom_{}'.format(g)
geom_klass = Registry[geom_name]
stat_name = 'stat_{}'.format(geom_klass.DEFAULT_PARAMS['stat'])
stat_klass = Registry[stat_name]
# find params
recognized = (kwargs.keys() &
(geom_klass.DEFAULT_PARAMS.keys() |
geom_klass.aesthetics() |
stat_klass.DEFAULT_PARAMS.keys() |
stat_klass.aesthetics()))
recognized = recognized - aesthetics.keys()
params = {ae: kwargs[ae] for ae in recognized}
p += geom_klass(**params)
# pd.Series objects have name attributes. In a dataframe, the
# series have the name of the column.
labels = {}
for ae in scaled_aesthetics & kwargs.keys():
with suppress(AttributeError):
labels[ae] = kwargs[ae].name
with suppress(AttributeError):
labels['x'] = xlab if xlab is not None else x.name
with suppress(AttributeError):
labels['y'] = ylab if ylab is not None else y.name
if main is not None:
labels['title'] = main
if 'x' in log:
p += scale_x_log10()
if 'y' in log:
p += scale_y_log10()
if labels:
p += labs(**labels)
if asp:
p += theme(aspect_ratio=asp)
return p