Skip to content

Commit

Permalink
Split categorical_aggregate2d into a few methods
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed Jan 9, 2017
1 parent f5998f2 commit 050c4c7
Showing 1 changed file with 43 additions and 27 deletions.
70 changes: 43 additions & 27 deletions holoviews/element/util.py
Expand Up @@ -80,29 +80,14 @@ class categorical_aggregate2d(ElementOperation):
datatype = param.List(['xarray', 'grid'] if xr else ['grid'], doc="""
The grid interface types to use when constructing the gridded Dataset.""")

def _process(self, obj, key=None):
def _get_coords(self, obj):
"""
Generates a categorical 2D aggregate by inserting NaNs at all
cross-product locations that do not already have a value assigned.
Returns a 2D gridded Dataset object.
Get the coordinates of the 2D aggregate, maintaining the correct
sorting order.
"""
if isinstance(obj, Dataset) and obj.interface.gridded:
return obj
elif obj.ndims > 2:
raise ValueError("Cannot aggregate more than two dimensions")
elif len(obj.dimensions()) < 3:
raise ValueError("Must have at two dimensions to aggregate over"
"and one value dimension to aggregate on.")

dim_labels = obj.dimensions(label=True)
dims = obj.dimensions()
kdims, vdims = dims[:2], dims[2:]
xdim, ydim = dim_labels[:2]
nvdims = len(dims) - 2
d1keys = obj.dimension_values(xdim, False)
d2keys = obj.dimension_values(ydim, False)
shape = (len(d2keys), len(d1keys))
nsamples = np.product(shape)
xdim, ydim = obj.dimensions(label=True)[:2]
xcoords = obj.dimension_values(xdim, False)
ycoords = obj.dimension_values(ydim, False)

# Determine global orderings of y-values using topological sort
grouped = obj.groupby(xdim, container_type=OrderedDict,
Expand All @@ -116,13 +101,25 @@ def _process(self, obj, key=None):
for i in range(len(vals)-1):
p1, p2 = vals[i:i+2]
orderings[p1] = [p2]
if one_to_one(orderings, d2keys):
d2keys = np.sort(d2keys)
if one_to_one(orderings, ycoords):
ycoords = np.sort(ycoords)
elif not is_cyclic(orderings):
d2keys = list(itertools.chain(*sort_topologically(orderings)))
ycoords = list(itertools.chain(*sort_topologically(orderings)))
return xcoords, ycoords

# Pad data with NaNs
ys, xs = cartesian_product([d2keys, d1keys])

def _aggregate_dataset(self, obj, xcoords, ycoords):
"""
Generates a gridded Dataset from a column-based dataset and
lists of xcoords and ycoords
"""
dim_labels = obj.dimensions(label=True)
vdims = obj.dimensions()[2:]
xdim, ydim = dim_labels[:2]
shape = (len(ycoords), len(xcoords))
nsamples = np.product(shape)

ys, xs = cartesian_product([ycoords, xcoords])
data = {xdim: xs.flatten(), ydim: ys.flatten()}
for vdim in vdims:
values = np.empty(nsamples)
Expand All @@ -134,8 +131,27 @@ def _process(self, obj, key=None):
agg = concat_data.reindex([xdim, ydim]).aggregate([xdim, ydim], reduce_fn)

# Convert data to a gridded dataset
grid_data = {xdim: d1keys, ydim: d2keys}
grid_data = {xdim: xcoords, ydim: ycoords}
for vdim in vdims:
grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape)
return agg.clone(grid_data, datatype=self.p.datatype)


def _process(self, obj, key=None):
"""
Generates a categorical 2D aggregate by inserting NaNs at all
cross-product locations that do not already have a value assigned.
Returns a 2D gridded Dataset object.
"""
if isinstance(obj, Dataset) and obj.interface.gridded:
return obj
elif obj.ndims > 2:
raise ValueError("Cannot aggregate more than two dimensions")
elif len(obj.dimensions()) < 3:
raise ValueError("Must have at two dimensions to aggregate over"
"and one value dimension to aggregate on.")

if not isinstance(obj, Dataset):
obj = Dataset(obj)
xcoords, ycoords = self._get_coords(obj)
return self._aggregate_dataset(obj, xcoords, ycoords)

0 comments on commit 050c4c7

Please sign in to comment.