Skip to content

Commit

Permalink
Added DataError for irrecoverable data interface initialization errors (
Browse files Browse the repository at this point in the history
#2041)

* Added DataError for irrecoverable data interface init errors

* Added additional interface info to DataError

* Fixed DataArray vdim error

* Added DataError for missing xarray coords

* Fixed error for GriddedInterface shape mismatch

* Updated MultiInterface error tests
  • Loading branch information
philippjfr committed Oct 28, 2017
1 parent 2c81120 commit 2a0e807
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 30 deletions.
6 changes: 3 additions & 3 deletions holoviews/core/data/array.py
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..ndmapping import NdMapping, item_check
Expand Down Expand Up @@ -75,8 +75,8 @@ def validate(cls, dataset):
ndims = len(dataset.dimensions())
ncols = dataset.data.shape[1] if dataset.data.ndim > 1 else 1
if ncols < ndims:
raise ValueError("Supplied data does not match specified "
"dimensions, expected at least %s columns." % ndims)
raise DataError("Supplied data does not match specified "
"dimensions, expected at least %s columns." % ndims, cls)


@classmethod
Expand Down
14 changes: 9 additions & 5 deletions holoviews/core/data/dictionary.py
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..dimension import OrderedDict as cyODict
Expand Down Expand Up @@ -104,10 +104,14 @@ def validate(cls, dataset):
dimensions = dataset.dimensions(label='name')
not_found = [d for d in dimensions if d not in dataset.data]
if not_found:
raise ValueError('Following dimensions not found in data: %s' % not_found)
lengths = [len(dataset.data[dim]) for dim in dimensions if not np.isscalar(dataset.data[dim])]
if len({l for l in lengths if l > 1}) > 1:
raise ValueError('Length of columns do not match')
raise DataError('Following columns specified as dimensions '
'but not found in data: %s' % not_found, cls)
lengths = [(dim, 1 if np.isscalar(dataset.data[dim]) else len(dataset.data[dim]))
for dim in dimensions]
if len({l for d, l in lengths if l > 1}) > 1:
lengths = ', '.join(['%s: %d' % l for l in sorted(lengths)])
raise DataError('Length of columns must be equal or scalar, '
'columns have lengths: %s' % lengths, cls)


@classmethod
Expand Down
9 changes: 5 additions & 4 deletions holoviews/core/data/grid.py
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from .dictionary import DictInterface
from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..dimension import OrderedDict as cyODict
Expand Down Expand Up @@ -74,10 +74,11 @@ def init(cls, eltype, data, kdims, vdims):
expected = tuple([len(data[kd]) for kd in kdim_names])
for vdim in vdim_names:
shape = data[vdim].shape
error = DataError if len(shape) > 1 else ValueError
if shape != expected[::-1] and not (not expected and shape == (1,)):
raise ValueError('Key dimension values and value array %s '
'shape do not match. Expected shape %s, '
'actual shape: %s' % (vdim, expected[::-1], shape))
raise error('Key dimension values and value array %s '
'shapes do not match. Expected shape %s, '
'actual shape: %s' % (vdim, expected[::-1], shape), cls)
return data, {'kdims':kdims, 'vdims':vdims}, {}


Expand Down
35 changes: 32 additions & 3 deletions holoviews/core/data/interface.py
Expand Up @@ -6,6 +6,15 @@
from .. import util


class DataError(ValueError):
"DataError is raised when the data cannot be interpreted"

def __init__(self, msg, interface=None):
if interface is not None:
msg = '\n\n'.join([msg, interface.error()])
super(DataError, self).__init__(msg)


class iloc(object):
"""
iloc is small wrapper object that allows row, column based
Expand Down Expand Up @@ -121,6 +130,24 @@ def cast(cls, dataset, datatype=None, cast_type=None):
for co in dataset]


@classmethod
def error(cls):
info = dict(interface=cls.__name__)
url = "http://holoviews.org/user_guide/%s_Datasets.html"
if cls.multi:
datatype = 'a list of tabular'
info['url'] = url % 'Tabular'
else:
if cls.gridded:
datatype = 'gridded'
else:
datatype = 'tabular'
info['url'] = url % datatype.capitalize()
info['datatype'] = datatype
return ("{interface} expects {datatype} data, for more information "
"on supported datatypes see {url}".format(**info))


@classmethod
def initialize(cls, eltype, data, kdims, vdims, datatype=None):
# Process params and dimensions
Expand Down Expand Up @@ -162,6 +189,8 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None):
try:
(data, dims, extra_kws) = interface.init(eltype, data, kdims, vdims)
break
except DataError:
raise
except Exception:
pass
else:
Expand All @@ -176,9 +205,9 @@ def validate(cls, dataset):
not_found = [d for d in dataset.dimensions(label='name')
if d not in dataset.data]
if not_found:
raise ValueError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found))
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)


@classmethod
Expand Down
4 changes: 2 additions & 2 deletions holoviews/core/data/iris.py
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from .interface import Interface
from .interface import Interface, DataError
from .grid import GridInterface
from ..dimension import Dimension
from ..element import Element
Expand Down Expand Up @@ -129,7 +129,7 @@ def init(cls, eltype, data, kdims, vdims):
@classmethod
def validate(cls, dataset):
if len(dataset.vdims) > 1:
raise ValueError("Iris cubes do not support more than one value dimension")
raise DataError("Iris cubes do not support more than one value dimension", cls)


@classmethod
Expand Down
9 changes: 5 additions & 4 deletions holoviews/core/data/multipath.py
@@ -1,7 +1,8 @@
import numpy as np

from ..util import max_range
from .interface import Interface
from .interface import Interface, DataError


class MultiInterface(Interface):
"""
Expand Down Expand Up @@ -40,11 +41,11 @@ def init(cls, eltype, data, kdims, vdims):
datatype=cls.subtypes)
if prev_interface:
if prev_interface != interface:
raise ValueError('MultiInterface subpaths must all have matching datatype.')
raise DataError('MultiInterface subpaths must all have matching datatype.', cls)
if dims['kdims'] != prev_dims['kdims']:
raise ValueError('MultiInterface subpaths must all have matching kdims.')
raise DataError('MultiInterface subpaths must all have matching kdims.', cls)
if dims['vdims'] != prev_dims['vdims']:
raise ValueError('MultiInterface subpaths must all have matching vdims.')
raise DataError('MultiInterface subpaths must all have matching vdims.', cls)
new_data.append(d)
prev_interface, prev_dims = interface, dims
return new_data, dims, {}
Expand Down
11 changes: 7 additions & 4 deletions holoviews/core/data/pandas.py
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import pandas as pd

from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..dimension import OrderedDict as cyODict
Expand Down Expand Up @@ -44,6 +44,9 @@ def init(cls, eltype, data, kdims, vdims):
elif kdims is None and vdims is None:
kdims = list(data.columns[:ndim])
vdims = [] if ndim is None else list(data.columns[ndim:])
if any(isinstance(d, (np.int64, int)) for d in kdims+vdims):
raise DataError("pandas DataFrame column names used as dimensions "
"must be strings not integers.", cls)
else:
# Check if data is of non-numeric type
# Then use defined data type
Expand Down Expand Up @@ -92,9 +95,9 @@ def validate(cls, dataset):
not_found = [d for d in dataset.dimensions(label='name')
if d not in dataset.data.columns]
if not_found:
raise ValueError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found))
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)


@classmethod
Expand Down
10 changes: 9 additions & 1 deletion holoviews/core/data/xarray.py
Expand Up @@ -16,7 +16,7 @@
from ..ndmapping import NdMapping, item_check, sorted_context
from ..element import Element
from .grid import GridInterface
from .interface import Interface
from .interface import Interface, DataError


class XArrayInterface(GridInterface):
Expand Down Expand Up @@ -50,6 +50,9 @@ def init(cls, eltype, data, kdims, vdims):
vdim = vdims[0]
elif len(vdim_param.default) == 1:
vdim = vdim_param.default[0]
else:
raise DataError("If xarray DataArray does not define a name "
"an explicit vdim must be supplied.", cls)
vdims = [vdim]
if not kdims:
kdims = [Dimension(d) for d in data.dims[::-1]]
Expand Down Expand Up @@ -88,8 +91,13 @@ def init(cls, eltype, data, kdims, vdims):
kdims = [name for name in data.indexes.keys()
if isinstance(data[name].data, np.ndarray)]

not_found = [d for d in kdims if d.name not in data.coords]
if not isinstance(data, xr.Dataset):
raise TypeError('Data must be be an xarray Dataset type.')
elif not_found:
raise DataError("xarray Dataset must define coordinates "
"for all defined kdims, %s coordinates not found."
% not_found, cls)
return data, {'kdims': kdims, 'vdims': vdims}, {}


Expand Down
7 changes: 3 additions & 4 deletions tests/testmultiinterface.py
Expand Up @@ -6,6 +6,7 @@

import numpy as np
from holoviews import Dataset
from holoviews.core.data.interface import DataError
from holoviews.element import Path
from holoviews.element.comparison import ComparisonTestCase

Expand Down Expand Up @@ -112,15 +113,13 @@ def test_multi_array_redim(self):
def test_multi_mixed_interface_raises(self):
arrays = [np.random.rand(10, 2) if j else {'x': range(10), 'y': range(10)}
for i in range(2) for j in range(2)]
error = "None of the available storage backends were able to support the supplied data format."
with self.assertRaisesRegexp(ValueError, error):
with self.assertRaises(DataError):
mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular'])

def test_multi_mixed_dims_raises(self):
arrays = [{'x': range(10), 'y' if j else 'z': range(10)}
for i in range(2) for j in range(2)]
error = "Following dimensions not found in data: \['y'\]"
with self.assertRaisesRegexp(ValueError, error):
with self.assertRaises(DataError):
mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular'])

def test_multi_split(self):
Expand Down

0 comments on commit 2a0e807

Please sign in to comment.