From 050c4c77fc0e231304f2604d6108173acde01a61 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 9 Jan 2017 18:08:39 +0000 Subject: [PATCH] Split categorical_aggregate2d into a few methods --- holoviews/element/util.py | 70 ++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/holoviews/element/util.py b/holoviews/element/util.py index f1c9b03d01..d395f6255c 100644 --- a/holoviews/element/util.py +++ b/holoviews/element/util.py @@ -80,29 +80,14 @@ class categorical_aggregate2d(ElementOperation): datatype = param.List(['xarray', 'grid'] if xr else ['grid'], doc=""" The grid interface types to use when constructing the gridded Dataset.""") - def _process(self, obj, key=None): + def _get_coords(self, obj): """ - Generates a categorical 2D aggregate by inserting NaNs at all - cross-product locations that do not already have a value assigned. - Returns a 2D gridded Dataset object. + Get the coordinates of the 2D aggregate, maintaining the correct + sorting order. """ - if isinstance(obj, Dataset) and obj.interface.gridded: - return obj - elif obj.ndims > 2: - raise ValueError("Cannot aggregate more than two dimensions") - elif len(obj.dimensions()) < 3: - raise ValueError("Must have at two dimensions to aggregate over" - "and one value dimension to aggregate on.") - - dim_labels = obj.dimensions(label=True) - dims = obj.dimensions() - kdims, vdims = dims[:2], dims[2:] - xdim, ydim = dim_labels[:2] - nvdims = len(dims) - 2 - d1keys = obj.dimension_values(xdim, False) - d2keys = obj.dimension_values(ydim, False) - shape = (len(d2keys), len(d1keys)) - nsamples = np.product(shape) + xdim, ydim = obj.dimensions(label=True)[:2] + xcoords = obj.dimension_values(xdim, False) + ycoords = obj.dimension_values(ydim, False) # Determine global orderings of y-values using topological sort grouped = obj.groupby(xdim, container_type=OrderedDict, @@ -116,13 +101,25 @@ def _process(self, obj, key=None): for i in range(len(vals)-1): p1, p2 = vals[i:i+2] orderings[p1] = [p2] - if one_to_one(orderings, d2keys): - d2keys = np.sort(d2keys) + if one_to_one(orderings, ycoords): + ycoords = np.sort(ycoords) elif not is_cyclic(orderings): - d2keys = list(itertools.chain(*sort_topologically(orderings))) + ycoords = list(itertools.chain(*sort_topologically(orderings))) + return xcoords, ycoords - # Pad data with NaNs - ys, xs = cartesian_product([d2keys, d1keys]) + + def _aggregate_dataset(self, obj, xcoords, ycoords): + """ + Generates a gridded Dataset from a column-based dataset and + lists of xcoords and ycoords + """ + dim_labels = obj.dimensions(label=True) + vdims = obj.dimensions()[2:] + xdim, ydim = dim_labels[:2] + shape = (len(ycoords), len(xcoords)) + nsamples = np.product(shape) + + ys, xs = cartesian_product([ycoords, xcoords]) data = {xdim: xs.flatten(), ydim: ys.flatten()} for vdim in vdims: values = np.empty(nsamples) @@ -134,8 +131,27 @@ def _process(self, obj, key=None): agg = concat_data.reindex([xdim, ydim]).aggregate([xdim, ydim], reduce_fn) # Convert data to a gridded dataset - grid_data = {xdim: d1keys, ydim: d2keys} + grid_data = {xdim: xcoords, ydim: ycoords} for vdim in vdims: grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape) return agg.clone(grid_data, datatype=self.p.datatype) + + def _process(self, obj, key=None): + """ + Generates a categorical 2D aggregate by inserting NaNs at all + cross-product locations that do not already have a value assigned. + Returns a 2D gridded Dataset object. + """ + if isinstance(obj, Dataset) and obj.interface.gridded: + return obj + elif obj.ndims > 2: + raise ValueError("Cannot aggregate more than two dimensions") + elif len(obj.dimensions()) < 3: + raise ValueError("Must have at two dimensions to aggregate over" + "and one value dimension to aggregate on.") + + if not isinstance(obj, Dataset): + obj = Dataset(obj) + xcoords, ycoords = self._get_coords(obj) + return self._aggregate_dataset(obj, xcoords, ycoords)