-
-
Notifications
You must be signed in to change notification settings - Fork 471
/
perspective.py
498 lines (421 loc) · 17.7 KB
/
perspective.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
from __future__ import annotations
import datetime as dt
import sys
from enum import Enum
from functools import partial
from typing import (
TYPE_CHECKING, Callable, ClassVar, List, Mapping, Optional, Type,
)
import numpy as np
import param
from bokeh.models import ColumnDataSource, ImportedStyleSheet
from pyviz_comms import JupyterComm
from ..io.resources import CDN_DIST
from ..io.state import state
from ..reactive import ReactiveData
from ..util import datetime_types, lazy_load
from ..viewable import Viewable
from .base import ModelPane
if TYPE_CHECKING:
from bokeh.document import Document
from bokeh.model import Model
from pyviz_comms import Comm
from ..model.perspective import PerspectiveClickEvent
DEFAULT_THEME = "material"
THEMES = [
'material', 'material-dark', 'monokai', 'solarized', 'solarized-dark', 'vaporwave'
]
class Plugin(Enum):
"""The plugins (grids/charts) available in Perspective. Pass these into
the `plugin` arg in `PerspectiveWidget` or `PerspectiveViewer`.
"""
HYPERGRID = "hypergrid" # hypergrid
GRID = "datagrid" # hypergrid
YBAR_D3 = "d3_y_bar" # d3fc
XBAR_D3 = "d3_x_bar" # d3fc
XYLINE_D3 = "d3_xy_line" # d3fc
YLINE_D3 = "d3_y_line" # d3fc
YAREA_D3 = "d3_y_area" # d3fc
YSCATTER_D3 = "d3_y_scatter" # d3fc
XYSCATTER_D3 = "d3_xy_scatter" # d3fc
TREEMAP_D3 = "d3_treemap" # d3fc
SUNBURST_D3 = "d3_sunburst" # d3fc
HEATMAP_D3 = "d3_heatmap" # d3fc
CANDLESTICK = "d3_candlestick" # d3fc
CANDLESTICK_D3 = "d3_candlestick" # noqa: PIE796, d3fc
OHLC = "d3_ohlc" # d3fc
OHLC_D3 = "d3_ohlc" # noqa: PIE796, d3fc
@staticmethod
def options():
"""
Returns the list of options of the PerspectiveViewer, like Hypergrid, Grid etc.
Returns
-------
options: list
A list of available options
"""
return list(c.value for c in Plugin)
def deconstruct_pandas(data, kwargs=None):
"""
Given a dataframe, flatten it by resetting the index and memoizing
the pivots that were applied.
This code was copied from the Perspective repository and is
reproduced under Apache 2.0 license. See the original at:
https://github.com/finos/perspective/blob/master/python/perspective/perspective/core/data/pd.py
Arguments
---------
data: (pandas.dataframe)
A Pandas DataFrame to parse
Returns
-------
data: pandas.DataFrame
A flattened version of the DataFrame
kwargs: dict
A dictionary containing optional members `columns`,
`group_by`, and `split_by`.
"""
import pandas as pd
kwargs = kwargs or {}
kwargs = {"columns": [], "group_by": [], "split_by": []}
if isinstance(data.index, pd.PeriodIndex):
data.index = data.index.to_timestamp()
if isinstance(data, pd.DataFrame):
if hasattr(pd, "CategoricalDtype"):
for k, v in data.dtypes.items():
if isinstance(v, pd.CategoricalDtype):
data[k] = data[k].astype(str)
if (
isinstance(data, pd.DataFrame)
and isinstance(data.columns, pd.MultiIndex)
and isinstance(data.index, pd.MultiIndex)
):
# Row and col pivots
kwargs["group_by"].extend([str(c) for c in data.index.names])
# Two strategies
if None in data.columns.names:
# In this case, we need to extract the column names from the row
# e.g. pt = pd.pivot_table(df, values = ['Discount','Sales'], index=['Country','Region'], columns=["State","Quantity"])
# Table will be
# Discount Sales
# State Alabama Alaska ... Alabama Alaska ...
# Quantity 150 350 ... 300 500
# Country Region
# US Region 0 ...
# US Region 1
#
# We need to transform this to:
# group_by = ['Country', 'Region']
# split_by = ['State', 'Quantity']
# columns = ['Discount', 'Sales']
existent = kwargs["group_by"] + data.columns.names
for c in data.columns.names:
if c is not None:
kwargs["split_by"].append(c)
data = data.stack()
data = pd.DataFrame(data).reset_index()
for new_column in data.columns:
if new_column not in existent:
kwargs["columns"].append(new_column)
else:
# In this case, we have no need as the values is just a single entry
# e.g. pt = pd.pivot_table(df, values = 'Discount', index=['Country','Region'], columns = ['Category', 'Segment'])
for _ in kwargs["group_by"]:
# unstack row pivots
data = data.unstack()
data = pd.DataFrame(data)
# this rather weird loop is to map existing None columns into
# levels, e.g. in the `else` block above, to reconstruct
# the "Discount" name. IDK if this is stored or if the name is
# lots, so we'll just call it 'index', 'index-1', ...
i = 0
new_names = list(data.index.names)
for j, val in enumerate(data.index.names):
if val is None:
new_names[j] = "index" if i == 0 else "index-{}".format(i)
i += 1
# kwargs['group_by'].append(str(new_names[j]))
else:
if str(val) not in kwargs["group_by"]:
kwargs["split_by"].append(str(val))
# Finally, remap any values columns to have column name 'value'
data.index.names = new_names
data = data.reset_index() # copy
data.columns = [
str(c)
if c
in ["index"]
+ kwargs["group_by"]
+ kwargs["split_by"]
+ kwargs["columns"]
else "value"
for c in data.columns
]
kwargs["columns"].extend(
[
"value"
for c in data.columns
if c
not in ["index"]
+ kwargs["group_by"]
+ kwargs["split_by"]
+ kwargs["columns"]
]
)
elif isinstance(data, pd.DataFrame) and isinstance(data.columns, pd.MultiIndex):
# Col pivots
if data.index.name:
kwargs["group_by"].append(str(data.index.name))
push_row_pivot = False
else:
push_row_pivot = True
data = pd.DataFrame(data.unstack())
i = 0
new_names = list(data.index.names)
for j, val in enumerate(data.index.names):
if val is None:
new_names[j] = "index" if i == 0 else "index-{}".format(i)
i += 1
if push_row_pivot:
kwargs["group_by"].append(str(new_names[j]))
else:
if str(val) not in kwargs["group_by"]:
kwargs["split_by"].append(str(val))
data.index.names = new_names
data.columns = [
str(c)
if c in ["index"] + kwargs["group_by"] + kwargs["split_by"]
else "value"
for c in data.columns
]
kwargs["columns"].extend(
[
"value"
for c in data.columns
if c not in ["index"] + kwargs["group_by"] + kwargs["split_by"]
]
)
elif isinstance(data, pd.DataFrame) and isinstance(data.index, pd.MultiIndex):
# Row pivots
kwargs["group_by"].extend(list(data.index.names))
data = data.reset_index() # copy
if isinstance(data, pd.DataFrame):
# flat df
if "index" not in [str(c).lower() for c in data.columns]:
data = data.reset_index(col_fill="index")
if not kwargs["columns"]:
# might already be set in row+col pivot df
kwargs["columns"].extend([str(c) for c in data.columns])
data.columns = kwargs["columns"]
if isinstance(data, pd.Series):
# Series
flattened = data.reset_index() # copy
if isinstance(data, pd.Series):
# preserve name from series
flattened.name = data.name
# make sure all columns are strings
flattened.columns = [str(c) for c in flattened.columns]
data = flattened
return data, kwargs
class Perspective(ModelPane, ReactiveData):
"""
The `Perspective` pane provides an interactive visualization component for
large, real-time datasets built on the Perspective project.
Reference: https://panel.holoviz.org/reference/panes/Perspective.html
:Example:
>>> Perspective(df, plugin='hypergrid', theme='material-dark')
"""
aggregates = param.Dict(default=None, nested_refs=True, doc="""
How to aggregate. For example {"x": "distinct count"}""")
columns = param.List(default=None, nested_refs=True, doc="""
A list of source columns to show as columns. For example ["x", "y"]""")
editable = param.Boolean(default=True, allow_None=True, doc="""
Whether items are editable.""")
expressions = param.List(default=None, nested_refs=True, doc="""
A list of expressions computing new columns from existing columns.
For example [""x"+"index""]""")
split_by = param.List(default=None, nested_refs=True, doc="""
A list of source columns to pivot by. For example ["x", "y"]""")
filters = param.List(default=None, nested_refs=True, doc="""
How to filter. For example [["x", "<", 3],["y", "contains", "abc"]]""")
min_width = param.Integer(default=420, bounds=(0, None), doc="""
Minimal width of the component (in pixels) if width is adjustable.""")
object = param.Parameter(doc="""
The plot data declared as a dictionary of arrays or a DataFrame.""")
group_by = param.List(default=None, doc="""
A list of source columns to group by. For example ["x", "y"]""")
selectable = param.Boolean(default=True, allow_None=True, doc="""
Whether items are selectable.""")
sort = param.List(default=None, doc="""
How to sort. For example[["x","desc"]]""")
plugin = param.ObjectSelector(default=Plugin.GRID.value, objects=Plugin.options(), doc="""
The name of a plugin to display the data. For example hypergrid or d3_xy_scatter.""")
plugin_config = param.Dict(default={}, nested_refs=True, doc="""
Configuration for the PerspectiveViewerPlugin.""")
toggle_config = param.Boolean(default=True, doc="""
Whether to show the config menu.""")
theme = param.ObjectSelector(default='material', objects=THEMES, doc="""
The style of the PerspectiveViewer. For example material-dark""")
priority: ClassVar[float | bool | None] = None
_bokeh_model: ClassVar[Type[Model] | None] = None
_data_params: ClassVar[List[str]] = ['object']
_rename: ClassVar[Mapping[str, str | None]] = {
'selection': None
}
_updates: ClassVar[bool] = True
_stylesheets: ClassVar[List[str]] = [
f'{CDN_DIST}css/perspective-datatable.css'
]
@classmethod
def applies(cls, object):
if isinstance(object, dict) and all(isinstance(v, (list, np.ndarray)) for v in object.values()):
return 0 if object else None
elif 'pandas' in sys.modules:
import pandas as pd
if isinstance(object, pd.DataFrame):
return 0
return False
def __init__(self, object=None, **params):
click_handler = params.pop('on_click', None)
self._on_click_callbacks = []
super().__init__(object, **params)
if click_handler:
self.on_click(click_handler)
def _get_data(self):
if self.object is None:
return {}, {}
if isinstance(self.object, dict):
ncols = len(self.object)
df = data = self.object
else:
df, kwargs = deconstruct_pandas(self.object)
ncols = len(df.columns)
data = {col: df[col].values for col in df.columns}
if kwargs:
self.param.update(**{
k: v for k, v in kwargs.items()
if getattr(self, k) is None
})
cols = set(self._as_digit(c) for c in df)
if len(cols) != ncols:
raise ValueError("Integer columns must be unique when "
"converted to strings.")
return df, {str(k): v for k, v in data.items()}
def _filter_properties(self, properties):
ignored = list(Viewable.param)
return [p for p in properties if p not in ignored]
def _get_properties(self, doc, source=None):
props = super()._get_properties(doc)
del props['object']
if props.get('toggle_config'):
props['height'] = self.height or 300
else:
props['height'] = self.height or 150
if source is None:
source = ColumnDataSource(data=self._data)
else:
source.data = self._data
props['source'] = source
props['schema'] = schema = {}
for col, array in source.data.items():
if not isinstance(array, np.ndarray):
array = np.asarray(array)
kind = array.dtype.kind
if kind == 'M':
schema[col] = 'datetime'
elif kind in 'ui':
schema[col] = 'integer'
elif kind == 'b':
schema[col] = 'boolean'
elif kind == 'f':
schema[col] = 'float'
elif kind in 'sU':
schema[col] = 'string'
else:
if len(array):
value = array[0]
if isinstance(value, dt.date):
schema[col] = 'date'
elif isinstance(value, datetime_types):
schema[col] = 'datetime'
elif isinstance(value, str):
schema[col] = 'string'
elif isinstance(value, (float, np.floating)):
schema[col] = 'float'
elif isinstance(value, (int, np.integer)):
schema[col] = 'integer'
else:
schema[col] = 'string'
else:
schema[col] = 'string'
return props
def _get_theme(self, theme, resources=None):
from ..models.perspective import THEME_URL
theme_url = f'{THEME_URL}{theme}.css'
if self._bokeh_model is not None:
self._bokeh_model.__css_raw__ = self._bokeh_model.__css_raw__[:3] + [theme_url]
return theme_url
def _process_param_change(self, params):
if 'stylesheets' in params or 'theme' in params:
self._get_theme(params.get('theme', self.theme))
css = getattr(self._bokeh_model, '__css__', [])
params['stylesheets'] = [
ImportedStyleSheet(url=ss) for ss in css
] + params.get('stylesheets', self.stylesheets)
props = super()._process_param_change(params)
for p in ('columns', 'group_by', 'split_by'):
if props.get(p):
props[p] = [None if col is None else str(col) for col in props[p]]
if props.get('sort'):
props['sort'] = [[str(col), *args] for col, *args in props['sort']]
if props.get('filters'):
props['filters'] = [[str(col), *args] for col, *args in props['filters']]
if props.get('aggregates'):
props['aggregates'] = {str(col): agg for col, agg in props['aggregates'].items()}
return props
def _as_digit(self, col):
if self._processed is None or col in self._processed or col is None:
return col
elif col.isdigit() and int(col) in self._processed:
return int(col)
return col
def _process_property_change(self, msg):
msg = super()._process_property_change(msg)
for prop in ('columns', 'group_by', 'split_by'):
if prop not in msg:
continue
msg[prop] = [self._as_digit(col) for col in msg[prop]]
if msg.get('sort'):
msg['sort'] = [[self._as_digit(col), *args] for col, *args in msg['sort']]
if msg.get('filters'):
msg['filters'] = [[self._as_digit(col), *args] for col, *args in msg['filters']]
if msg.get('aggregates'):
msg['aggregates'] = {self._as_digit(col): agg for col, agg in msg['aggregates'].items()}
return msg
def _get_model(
self, doc: Document, root: Optional[Model] = None,
parent: Optional[Model] = None, comm: Optional[Comm] = None
) -> Model:
self._bokeh_model = lazy_load(
'panel.models.perspective', 'Perspective', isinstance(comm, JupyterComm), root
)
model = super()._get_model(doc, root, parent, comm)
self._register_events('perspective-click', model=model, doc=doc, comm=comm)
return model
def _update(self, ref: str, model: Model) -> None:
model.update(**self._get_properties(model.document, source=model.source))
def _process_event(self, event):
if event.event_name == 'perspective-click':
for cb in self._on_click_callbacks:
state.execute(partial(cb, event), schedule=False)
def on_click(self, callback: Callable[[PerspectiveClickEvent], None]):
"""
Register a callback to be executed when any row is clicked.
The callback is given a PerspectiveClickEvent declaring the
config, column names, and row values of the row that was
clicked.
Arguments
---------
callback: (callable)
The callback to run on edit events.
"""
self._on_click_callbacks.append(callback)