Skip to content

Commit

Permalink
Added inspect_points datashader operation (#4794)
Browse files Browse the repository at this point in the history
  • Loading branch information
jlstevens committed Jan 27, 2021
1 parent f82f2be commit 4cd9fb1
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 19 deletions.
18 changes: 10 additions & 8 deletions holoviews/core/data/__init__.py
Expand Up @@ -905,6 +905,11 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
transformed = transformed.collapse()
return transformed.clone(new_type=type(self))

ndims = len(dimensions)
min_d, max_d = self.param.objects('existing')['kdims'].bounds
generic_type = (min_d is not None and ndims < min_d) or (max_d is not None and ndims > max_d)
new_type = Dataset if generic_type else None

# Handle functions
kdims = [self.get_dimension(d, strict=True) for d in dimensions]
if not self:
Expand All @@ -913,17 +918,15 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
vdims = [d for vd in self.vdims for d in [vd, vd.clone('_'.join([vd.name, spread_name]))]]
else:
vdims = self.vdims
return self.clone([], kdims=kdims, vdims=vdims)
if not kdims and len(vdims) == 1:
return np.nan
return self.clone([], kdims=kdims, vdims=vdims, new_type=new_type)

vdims = self.vdims
aggregated, dropped = self.interface.aggregate(self, kdims, function, **kwargs)
aggregated = self.interface.unpack_scalar(self, aggregated)
vdims = [vd for vd in vdims if vd not in dropped]

ndims = len(dimensions)
min_d, max_d = self.param.objects('existing')['kdims'].bounds
generic_type = (min_d is not None and ndims < min_d) or (max_d is not None and ndims > max_d)

if spreadfn:
error, _ = self.interface.aggregate(self, dimensions, spreadfn)
spread_name = spreadfn.__name__
Expand All @@ -936,7 +939,7 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
idx = vdims.index(d)
combined = combined.add_dimension(dim, idx+1, dvals, True)
vdims = combined.vdims
return combined.clone(new_type=Dataset if generic_type else type(self))
return combined.clone(new_type=new_type)

if np.isscalar(aggregated):
return aggregated
Expand All @@ -947,8 +950,7 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
except:
datatype = self.param.objects('existing')['datatype'].default
return self.clone(aggregated, kdims=kdims, vdims=vdims,
new_type=Dataset if generic_type else None,
datatype=datatype)
new_type=new_type, datatype=datatype)


def groupby(self, dimensions=[], container_type=HoloMap, group_type=None,
Expand Down
12 changes: 12 additions & 0 deletions holoviews/core/data/spatialpandas.py
Expand Up @@ -428,6 +428,18 @@ def split(cls, dataset, start, end, datatype, **kwargs):
objs.append(obj)
return objs

@classmethod
def dframe(cls, dataset, dimensions):
if dimensions:
return dataset.data[dimensions]
else:
return dataset.data.copy()

@classmethod
def as_dframe(cls, dataset):
return dataset.data




def get_geom_type(gdf, col):
Expand Down
11 changes: 9 additions & 2 deletions holoviews/core/data/spatialpandas_dask.py
Expand Up @@ -52,7 +52,8 @@ def partition_values(cls, df, dataset, dimension, expanded, flat):
@classmethod
def values(cls, dataset, dimension, expanded=True, flat=True, compute=True, keep_index=False):
if compute and not keep_index:
meta = np.array([], dtype=cls.dtype(dataset, dimension))
dtype = cls.dtype(dataset, dimension)
meta = np.array([], dtype=dtype.base)
return dataset.data.map_partitions(
cls.partition_values, meta=meta, dataset=dataset,
dimension=dimension, expanded=expanded, flat=flat
Expand All @@ -79,6 +80,12 @@ def iloc(cls, dataset, index):
@classmethod
def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
return cls.base_interface.add_dimension(dataset, dimension, dim_pos, values, vdim)


@classmethod
def dframe(cls, dataset, dimensions):
if dimensions:
return dataset.data[dimensions].compute()
else:
return dataset.data.compute()

Interface.register(DaskSpatialPandasInterface)
7 changes: 4 additions & 3 deletions holoviews/core/data/xarray.py
Expand Up @@ -411,9 +411,10 @@ def unpack_scalar(cls, dataset, data):
Given a dataset object and data in the appropriate format for
the interface, return a simple scalar.
"""
if (not cls.packed(dataset) and len(data.data_vars) == 1 and
len(data[dataset.vdims[0].name].shape) == 0):
return data[dataset.vdims[0].name].item()
if not cls.packed(dataset) and len(data.data_vars) == 1:
array = data[dataset.vdims[0].name].squeeze()
if len(array.shape) == 0:
return array.item()
return data


Expand Down
4 changes: 3 additions & 1 deletion holoviews/element/path.py
Expand Up @@ -56,7 +56,9 @@ class Path(SelectionPolyExpr, Geometry):

group = param.String(default="Path", constant=True)

datatype = param.ObjectSelector(default=['multitabular', 'spatialpandas'])
datatype = param.ObjectSelector(default=[
'multitabular', 'spatialpandas', 'dask_spatialpandas']
)

def __init__(self, data, kdims=None, vdims=None, **params):
if isinstance(data, tuple) and len(data) == 2:
Expand Down
216 changes: 212 additions & 4 deletions holoviews/operation/datashader.py
Expand Up @@ -33,7 +33,7 @@
QuadMesh, Contours, Spikes, Area, Rectangles,
Spread, Segments, Scatter, Points, Polygons)
from ..element.util import connect_tri_edges_pd
from ..streams import RangeXY, PlotSize
from ..streams import RangeXY, PlotSize, PointerXY

ds_version = LooseVersion(ds.__version__)

Expand Down Expand Up @@ -756,7 +756,7 @@ def _process(self, element, key=None):
df[y0d.name] = df[y0d.name].astype('datetime64[us]').astype('int64')
df[y1d.name] = df[y1d.name].astype('datetime64[us]').astype('int64')

if isinstance(agg_fn, ds.count_cat):
if isinstance(agg_fn, ds.count_cat) and df[agg_fn.column].dtype.name != 'category':
df[agg_fn.column] = df[agg_fn.column].astype('category')

params = self._get_agg_params(element, x0d, y0d, agg_fn, (x0, y0, x1, y1))
Expand Down Expand Up @@ -861,7 +861,7 @@ def _get_xarrays(self, element, coords, xtype, ytype):
for i, vd in enumerate(element.vdims):
if element.interface is XArrayInterface:
if element.interface.packed(element):
xarr = element.data[..., i]
xarr = element.data[..., i]
else:
xarr = element.data[vd.name]
if 'datetime' in (xtype, ytype):
Expand Down Expand Up @@ -1374,7 +1374,7 @@ def _process(self, element, key=None):
if self.p.precompute:
self._precomputed[element._plot_id] = (data, col)

if isinstance(agg_fn, ds.count_cat):
if isinstance(agg_fn, ds.count_cat) and data[agg_fn.column].dtype.name != 'category':
data[agg_fn.column] = data[agg_fn.column].astype('category')

if isinstance(element, Polygons):
Expand Down Expand Up @@ -1725,3 +1725,211 @@ class directly_connect_edges(_connect_edges, connect_edges):

def _bundle(self, position_df, edges_df):
return connect_edges.__call__(self, position_df, edges_df)


def identity(x): return x


class inspect_mask(Operation):
"""
Operation used to display the inspection mask, for use with other
inspection operations. Can be used directly but is more commonly
constructed using the mask property of the corresponding inspector
operation.
"""

pixels = param.Integer(default=3, doc="""
Size of the mask that should match the pixels parameter used in
the associated inspection operation.""")

streams = param.List(default=[PointerXY])
x = param.Number(default=0)
y = param.Number(default=0)

@classmethod
def _distance_args(cls, element, x_range, y_range, pixels):
ycount, xcount = element.interface.shape(element, gridded=True)
x_delta = abs(x_range[1] - x_range[0]) / xcount
y_delta = abs(y_range[1] - y_range[0]) / ycount
return (x_delta*pixels, y_delta*pixels)

def _process(self, raster, key=None):
if isinstance(raster, RGB):
raster = raster[..., raster.vdims[-1]]
x_range, y_range = raster.range(0), raster.range(1)
xdelta, ydelta = self._distance_args(raster, x_range, y_range, self.p.pixels)
x, y = self.p.x, self.p.y
return self._indicator(raster.kdims, x, y, xdelta, ydelta)

def _indicator(self, kdims, x, y, xdelta, ydelta):
rect = np.array([(x-xdelta/2,y-ydelta/2), (x+xdelta/2, y-ydelta/2),
(x+xdelta/2, y+ydelta/2), (x-xdelta/2, y+ydelta/2)])
data = {(str(kdims[0]),str(kdims[1])):rect}
return Polygons(data, kdims=kdims)



class inspect_base(Operation):
"""
Given datashaded aggregate (Image) output, return a set of
(hoverable) points sampled from those near the cursor.
"""

pixels = param.Integer(default=3, doc="""
Number of pixels in data space around the cursor point to search
for hits in. The hit within this box mask that is closest to the
cursor's position is displayed.""")

null_value = param.Number(default=0, doc="""
Value of raster which indicates no hits. For instance zero for
count aggregator (default) and commonly NaN for other (float)
aggregators. For RGBA images, the alpha channel is used which means
zero alpha acts as the null value.""")

value_bounds = param.NumericTuple(default=None, length=2, allow_None=True, doc="""
If not None, a numeric bounds for the pixel under the cursor in
order for hits to be computed. Useful for count aggregators where
a value of (1,1000) would make sure no more than a thousand
samples will be searched.""")

hits = param.DataFrame(default=pd.DataFrame(), allow_None=True)

max_indicators = param.Integer(default=1, doc="""
Maximum number of indicator elements to display within the mask
of size pixels. Points are prioritized by distance from the
cursor point. This means that the default value of one shows the
single closest sample to the cursor. Note that this limit is not
applies to the hits parameter.""")

transform = param.Callable(default=identity, doc="""
Function that transforms the hits dataframe before it is passed to
the Points element. Can be used to customize the value dimensions
e.g. to implement custom hover behavior.""")

# Stream values and overrides
streams = param.List(default=[PointerXY])
x = param.Number(default=0)
y = param.Number(default=0)

@property
def mask(self):
return inspect_mask.instance(pixels=self.p.pixels)

def _process(self, raster, key=None):
self._validate(raster)
if isinstance(raster, RGB):
raster = raster[..., raster.vdims[-1]]
x_range, y_range = raster.range(0), raster.range(1)
xdelta, ydelta = self._distance_args(raster, x_range, y_range, self.p.pixels)
x, y = self.p.x, self.p.y
val = raster[x-xdelta:x+xdelta, y-ydelta:y+ydelta].reduce(function=np.nansum)
if np.isnan(val):
val = self.p.null_value

if ((self.p.value_bounds and
not (self.p.value_bounds[0] < val < self.p.value_bounds[1]))
or val == self.p.null_value):
result = self._empty_df(raster.dataset)
else:
masked = self._mask_dataframe(raster, x, y, xdelta, ydelta)
result = self._sort_by_distance(raster, masked, x, y)

self.hits = result
df = self.p.transform(result)
return self._element(raster, df.iloc[:self.p.max_indicators])

@classmethod
def _distance_args(cls, element, x_range, y_range, pixels):
ycount, xcount = element.interface.shape(element, gridded=True)
x_delta = abs(x_range[1] - x_range[0]) / xcount
y_delta = abs(y_range[1] - y_range[0]) / ycount
return (x_delta*pixels, y_delta*pixels)

@classmethod
def _empty_df(cls, dataset):
if 'dask' in dataset.interface.datatype:
return dataset.data._meta.iloc[:0]
elif dataset.interface.datatype in ['pandas', 'geopandas', 'spatialpandas']:
return dataset.data.head(0)
return dataset.iloc[:0].dframe()

@classmethod
def _mask_dataframe(cls, raster, x, y, xdelta, ydelta):
"""
Mask the dataframe around the specified x and y position with
the given x and y deltas
"""
ds = raster.dataset
x0, x1, y0, y1 = x-xdelta, x+xdelta, y-ydelta, y+ydelta
if 'spatialpandas' in ds.interface.datatype:
df = ds.data.cx[x0:x1, y0:y1]
return df.compute() if hasattr(df, 'compute') else df
xdim, ydim = raster.kdims
query = {xdim.name: (x0, x1), ydim.name: (y0, y1)}
return ds.select(**query).dframe()

@classmethod
def _validate(cls, raster):
pass

@classmethod
def _vdims(cls, raster, df):
ds = raster.dataset
if 'spatialpandas' in ds.interface.datatype:
coords = [ds.interface.geo_column(ds.data)]
else:
coords = [kd.name for kd in raster.kdims]
return [col for col in df.columns if col not in coords]



class inspect_points(inspect_base):

@classmethod
def _element(cls, raster, df):
return Points(df, kdims=raster.kdims, vdims=cls._vdims(raster, df))

@classmethod
def _sort_by_distance(cls, raster, df, x, y):
"""
Returns a dataframe of hits within a given mask around a given
spatial location, sorted by distance from that location.
"""
ds = raster.dataset.clone(df)
xs, ys = (ds.dimension_values(kd) for kd in raster.kdims)
dx, dy = xs - x, ys - y
distances = pd.Series(dx*dx + dy*dy)
return df.iloc[distances.argsort().values]



class inspect_poly(inspect_base):

@classmethod
def _validate(cls, raster):
if 'spatialpandas' not in raster.dataset.interface.datatype:
raise ValueError("inspect_poly only supports spatialpandas datatypes.")

@classmethod
def _element(cls, raster, df):
return Polygons(df, kdims=raster.kdims, vdims=cls._vdims(raster, df)).opts(
color_index=None)

@classmethod
def _sort_by_distance(cls, raster, df, x, y):
"""
Returns a dataframe of hits within a given mask around a given
spatial location, sorted by distance from that location.
"""
xs, ys = [], []
for geom in df.geometry.array:
gxs, gys = geom.flat_values[::2], geom.flat_values[1::2]
if not len(gxs):
xs.append(np.nan)
ys.append(np.nan)
else:
xs.append((np.min(gxs)+np.max(gxs))/2)
ys.append((np.min(gys)+np.max(gys))/2)
dx, dy = np.array(xs) - x, np.array(ys) - y
distances = pd.Series(dx*dx + dy*dy)
return df.iloc[distances.argsort().values]

0 comments on commit 4cd9fb1

Please sign in to comment.