Skip to content

Commit

Permalink
Merge fd8100a into b39a3c3
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed Sep 23, 2019
2 parents b39a3c3 + fd8100a commit ba37181
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 46 deletions.
90 changes: 77 additions & 13 deletions holoviews/core/data/grid.py
Expand Up @@ -57,8 +57,15 @@ def init(cls, eltype, data, kdims, vdims):

ndims = len(kdims)
dimensions = [dimension_name(d) for d in kdims+vdims]
vdim_tuple = tuple(dimension_name(vd) for vd in vdims)
if isinstance(data, tuple):
data = {d: v for d, v in zip(dimensions, data)}
if (len(data) != len(dimensions) and len(data) == (ndims+1) and
len(data[-1].shape) == (ndims+1)):
value_array = data[-1]
data = {d: v for d, v in zip(dimensions, data[:-1])}
data[vdim_tuple] = value_array
else:
data = {d: v for d, v in zip(dimensions, data)}
elif isinstance(data, list) and data == []:
data = OrderedDict([(d, []) for d in dimensions])
elif not any(isinstance(data, tuple(t for t in interface.types if t is not None))
Expand All @@ -78,22 +85,37 @@ def init(cls, eltype, data, kdims, vdims):
raise TypeError('GridInterface must be instantiated as a '
'dictionary or tuple')

for dim in kdims+vdims:
validate_dims = list(kdims)
if vdim_tuple in data:
if not isinstance(data[vdim_tuple], get_array_types()):
data[vdim_tuple] = np.array(data[vdim_tuple])
else:
validate_dims += vdims

for dim in validate_dims:
name = dimension_name(dim)
if name not in data:
raise ValueError("Values for dimension %s not found" % dim)
if not isinstance(data[name], get_array_types()):
data[name] = np.array(data[name])

kdim_names = [dimension_name(d) for d in kdims]
vdim_names = [dimension_name(d) for d in vdims]
if vdim_tuple in data:
vdim_names = [vdim_tuple]
else:
vdim_names = [dimension_name(d) for d in vdims]

expected = tuple([len(data[kd]) for kd in kdim_names])
irregular_shape = data[kdim_names[0]].shape if kdim_names else ()
valid_shape = irregular_shape if len(irregular_shape) > 1 else expected[::-1]
shapes = tuple([data[kd].shape for kd in kdim_names])
for vdim in vdim_names:
shape = data[vdim].shape
error = DataError if len(shape) > 1 else ValueError
if vdim_tuple in data:
if shape[-1] != len(vdims):
raise error('The shape of the value array does not match the number of value dimensions.')
shape = shape[:-1]
if (not expected and shape == (1,)) or (len(set((shape,)+shapes)) == 1 and len(shape) > 1):
# If empty or an irregular mesh
pass
Expand Down Expand Up @@ -154,7 +176,13 @@ def isscalar(cls, dataset, dim):

@classmethod
def validate(cls, dataset, vdims=True):
Interface.validate(dataset, vdims)
dims = 'all' if vdims else 'key'
not_found = [d for d in dataset.dimensions(dims, label='name')
if d not in dataset.data]
if not_found and tuple(not_found) not in dataset.data:
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)


@classmethod
Expand All @@ -166,9 +194,33 @@ def dimension_type(cls, dataset, dim):
return arr.dtype.type


@classmethod
def packed(cls, dataset):
vdim_tuple = tuple(vd.name for vd in dataset.vdims)
return vdim_tuple if vdim_tuple in dataset.data else False


@classmethod
def dtype(cls, dataset, dimension):
name = dataset.get_dimension(dimension, strict=True).name
vdim_tuple = cls.packed(dataset)
if vdim_tuple and name in vdim_tuple:
data = dataset.data[vdim_tuple][..., vdim_tuple.index(name)]
else:
data = dataset.data[name]
if util.isscalar(data):
return np.array([data]).dtype
else:
return data.dtype


@classmethod
def shape(cls, dataset, gridded=False):
shape = dataset.data[dataset.vdims[0].name].shape
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
shape = dataset.data[vdim_tuple].shape[:-1]
else:
shape = dataset.data[dataset.vdims[0].name].shape
if gridded:
return shape
else:
Expand Down Expand Up @@ -343,7 +395,11 @@ def values(
):
dim = dataset.get_dimension(dim, strict=True)
if dim in dataset.vdims or dataset.data[dim.name].ndim > 1:
data = dataset.data[dim.name]
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
data = dataset.data[vdim_tuple][..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name]
data = cls.canonicalize(dataset, data)
da = dask_array_module()
if compute and da and isinstance(data, da.Array):
Expand Down Expand Up @@ -582,13 +638,21 @@ def aggregate(cls, dataset, kdims, function, **kwargs):
for kdim in dataset.kdims if kdim not in kdims)
da = dask_array_module()
dropped = []
for vdim in dataset.vdims:
values = dataset.data[vdim.name]
atleast_1d = da.atleast_1d if is_dask(values) else np.atleast_1d
try:
data[vdim.name] = atleast_1d(function(values, axis=axes, **kwargs))
except TypeError:
dropped.append(vdim)
vdim_tuple = cls.packed(dataset)
if vdim_tuple:
values = dataset.data[vdim_tuple]
if axes:
data[vdim_tuple] = function(values, axis=axes, **kwargs)
else:
data[vdim_tuple] = values
else:
for vdim in dataset.vdims:
values = dataset.data[vdim.name]
atleast_1d = da.atleast_1d if is_dask(values) else np.atleast_1d
try:
data[vdim.name] = atleast_1d(function(values, axis=axes, **kwargs))
except TypeError:
dropped.append(vdim)
return data, dropped


Expand Down
107 changes: 80 additions & 27 deletions holoviews/core/data/xarray.py
Expand Up @@ -33,18 +33,32 @@ def applies(cls, obj):
@classmethod
def dimension_type(cls, dataset, dim):
name = dataset.get_dimension(dim, strict=True).name
if cls.packed(dataset) and name in dataset.vdims:
return dataset.data.dtype.type
return dataset.data[name].dtype.type


@classmethod
def dtype(cls, dataset, dim):
name = dataset.get_dimension(dim, strict=True).name
if cls.packed(dataset) and name in dataset.vdims:
return dataset.data.dtype
return dataset.data[name].dtype

@classmethod
def packed(cls, dataset):
import xarray as xr
return isinstance(dataset.data, xr.DataArray)

@classmethod
def shape(cls, dataset, gridded=False):
array = dataset.data[dataset.vdims[0].name]
if cls.packed(dataset):
shape = dataset.data.shape[:-1]
if gridded:
return shape
else:
return (np.product(shape, dtype=np.intp), len(dataset.dimensions()))
else:
array = dataset.data[dataset.vdims[0].name]
if not any(cls.irregular(dataset, kd) for kd in dataset.kdims):
names = [kd.name for kd in dataset.kdims
if kd.name in array.dims][::-1]
Expand Down Expand Up @@ -81,8 +95,13 @@ def retrieve_unit_and_label(dim):
spec = (dim.name, dim.label)
return dim.clone(spec, unit=unit)

packed = False
if isinstance(data, xr.DataArray):
if vdims:
kdim_len = len(kdim_param.default) if kdims is None else len(kdims)
vdim_len = len(vdim_param.default) if vdims is None else len(vdims)
if kdim_len == len(data.dims)-1 and data.shape[-1] == vdim_len:
packed = True
elif vdims:
vdim = vdims[0]
elif data.name:
vdim = Dimension(data.name)
Expand All @@ -104,10 +123,11 @@ def retrieve_unit_and_label(dim):
"dimension. Give the DataArray a name or "
"supply an explicit vdim." % eltype.__name__,
cls)
vdims = [vdim]
data = data.to_dataset(name=vdim.name)
if not packed:
vdims = [vdim]
data = data.to_dataset(name=vdim.name)

if not isinstance(data, xr.Dataset):
if not isinstance(data, (xr.Dataset, xr.DataArray)):
if kdims is None:
kdims = kdim_param.default
if vdims is None:
Expand All @@ -116,10 +136,18 @@ def retrieve_unit_and_label(dim):
vdims = [asdim(vd) for vd in vdims]
if isinstance(data, np.ndarray) and data.ndim == 2 and data.shape[1] == len(kdims+vdims):
data = tuple(data)

ndims = len(kdims)
if isinstance(data, tuple):
data = {d.name: vals for d, vals in zip(kdims + vdims, data)}
dimensions = [d.name for d in kdims+vdims]
if (len(data) != len(dimensions) and len(data) == (ndims+1) and
len(data[-1].shape) == (ndims+1)):
value_array = data[-1]
data = {d: v for d, v in zip(dimensions, data[:-1])}
packed = True
else:
data = {d: v for d, v in zip(dimensions, data)}
elif isinstance(data, list) and data == []:
ndims = len(kdims)
dimensions = [d.name for d in kdims + vdims]
data = {d: np.array([]) for d in dimensions[:ndims]}
data.update({d: np.empty((0,) * ndims) for d in dimensions[ndims:]})
Expand All @@ -138,13 +166,19 @@ def retrieve_unit_and_label(dim):
coord = coord_vals
coords[kd.name] = coord
xr_kwargs = {'dims': dims if max(coord_dims) > 1 else list(coords)[::-1]}
arrays = {}
for vdim in vdims:
arr = data[vdim.name]
if not isinstance(arr, xr.DataArray):
arr = xr.DataArray(arr, coords=coords, **xr_kwargs)
arrays[vdim.name] = arr
data = xr.Dataset(arrays)
if packed:
xr_kwargs['dims'] = list(coords)[::-1] + ['band']
coords['band'] = list(range(len(vdims)))
print(coords, xr_kwargs)
data = xr.DataArray(value_array, coords=coords, **xr_kwargs)
else:
arrays = {}
for vdim in vdims:
arr = data[vdim.name]
if not isinstance(arr, xr.DataArray):
arr = xr.DataArray(arr, coords=coords, **xr_kwargs)
arrays[vdim.name] = arr
data = xr.Dataset(arrays)
else:
if not data.coords:
data = data.assign_coords(**{k: range(v) for k, v in data.dims.items()})
Expand All @@ -156,7 +190,9 @@ def retrieve_unit_and_label(dim):
kdims = [name for name in data.indexes.keys()
if isinstance(data[name].data, np.ndarray)]
kdims = sorted(kdims, key=lambda x: (xrcoords.index(x) if x in xrcoords else float('inf'), x))
if set(xrdims) != set(kdims):
if packed:
kdims = kdims[:-1]
elif set(xrdims) != set(kdims):
virtual_dims = [xd for xd in xrdims if xd not in kdims]
for c in data.coords:
if c not in kdims and set(data[c].dims) == set(virtual_dims):
Expand All @@ -169,7 +205,7 @@ def retrieve_unit_and_label(dim):
if not any(d.name == k or (isinstance(v, xr.DataArray) and d.name in v.dims)
for k, v in data.coords.items()):
not_found.append(d)
if not isinstance(data, xr.Dataset):
if not isinstance(data, (xr.Dataset, xr.DataArray)):
raise TypeError('Data must be be an xarray Dataset type.')
elif not_found:
raise DataError("xarray Dataset must define coordinates "
Expand All @@ -181,7 +217,9 @@ def retrieve_unit_and_label(dim):

@classmethod
def validate(cls, dataset, vdims=True):
Interface.validate(dataset, vdims)
import xarray as xr
if isinstance(dataset.data, xr.Dataset):
Interface.validate(dataset, vdims)
# Check whether irregular (i.e. multi-dimensional) coordinate
# array dimensionality matches
irregular = []
Expand Down Expand Up @@ -210,7 +248,10 @@ def range(cls, dataset, dimension):
else:
dmin, dmax = np.nanmin(data), np.nanmax(data)
else:
data = dataset.data[dim]
if cls.packed(dataset) and dim in dataset.vdims:
data = dataset.data.values[..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim]
if len(data):
dmin, dmax = data.min().data, data.max().data
else:
Expand Down Expand Up @@ -248,9 +289,6 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):

drop_dim = any(d not in group_kwargs['kdims'] for d in element_dims)

# XArray 0.7.2 does not support multi-dimensional groupby
# Replace custom implementation when
# https://github.com/pydata/xarray/pull/818 is merged.
group_by = [d.name for d in index_dims]
data = []
if len(dimensions) == 1:
Expand Down Expand Up @@ -313,15 +351,22 @@ def coords(cls, dataset, dimension, ordered=False, expanded=False, edges=False):
@classmethod
def values(cls, dataset, dim, expanded=True, flat=True, compute=True, keep_index=False):
dim = dataset.get_dimension(dim, strict=True)
data = dataset.data[dim.name].data
packed = cls.packed(dataset) and dim in dataset.vdims
if packed:
data = dataset.data.data[..., dataset.vdims.index(dim)]
else:
data = dataset.data[dim.name].data
irregular = cls.irregular(dataset, dim) if dim in dataset.kdims else False
irregular_kdims = [d for d in dataset.kdims if cls.irregular(dataset, d)]
if irregular_kdims:
virtual_coords = list(dataset.data[irregular_kdims[0].name].coords.dims)
else:
virtual_coords = []
if dim in dataset.vdims or irregular:
data_coords = list(dataset.data[dim.name].dims)
if packed:
data_coords = list(dataset.data.dims)[:-1]
else:
data_coords = list(dataset.data[dim.name].dims)
da = dask_array_module()
if compute and da and isinstance(data, da.Array):
data = data.compute()
Expand All @@ -347,7 +392,7 @@ def unpack_scalar(cls, dataset, data):
Given a dataset object and data in the appropriate format for
the interface, return a simple scalar.
"""
if (len(data.data_vars) == 1 and
if (not cls.packed(dataset) and len(data.data_vars) == 1 and
len(data[dataset.vdims[0].name].shape) == 0):
return data[dataset.vdims[0].name].item()
return data
Expand Down Expand Up @@ -396,6 +441,11 @@ def ndloc(cls, dataset, indices):
for ind in adjusted_indices) and len(indices) == len(kdims))
if sampled or (all_scalar and len(indices) == len(kdims)):
import xarray as xr
if cls.packed(dataset):
selected = dataset.data.isel({k: xr.DataArray(v) for k, v in isel.items()})
df = selected.to_dataframe('vdims')[['vdims']].T
vdims = [vd.name for vd in dataset.vdims]
return df.rename(columns={i: d for i, d in enumerate(vdims)})[vdims]
if all_scalar: isel = {k: [v] for k, v in isel.items()}
selected = dataset.data.isel({k: xr.DataArray(v) for k, v in isel.items()})
return selected.to_dataframe().reset_index()
Expand All @@ -422,8 +472,11 @@ def reindex(cls, dataset, kdims=None, vdims=None):
if len(vals) == 1:
constant[kd.name] = vals[0]
if len(constant) == len(dropped_kdims):
return dataset.data.sel(**{k: v for k, v in constant.items()
if k in dataset.data.dims})
dropped = dataset.data.sel(**{k: v for k, v in constant.items()
if k in dataset.data.dims})
if vdims and cls.packed(dataset):
return dropped.isel(**{dataset.data.dims[-1]: [dataset.vdims.index(vd) for vd in vdims]})
return dropped
elif dropped_kdims:
return tuple(dataset.columns(kdims+vdims).values())
return dataset.data
Expand Down

0 comments on commit ba37181

Please sign in to comment.