Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added DataError for irrecoverable data interface initialization errors #2041

Merged
merged 6 commits into from
Oct 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..ndmapping import NdMapping, item_check
Expand Down Expand Up @@ -75,8 +75,8 @@ def validate(cls, dataset):
ndims = len(dataset.dimensions())
ncols = dataset.data.shape[1] if dataset.data.ndim > 1 else 1
if ncols < ndims:
raise ValueError("Supplied data does not match specified "
"dimensions, expected at least %s columns." % ndims)
raise DataError("Supplied data does not match specified "
"dimensions, expected at least %s columns." % ndims, cls)


@classmethod
Expand Down
14 changes: 9 additions & 5 deletions holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..dimension import OrderedDict as cyODict
Expand Down Expand Up @@ -104,10 +104,14 @@ def validate(cls, dataset):
dimensions = dataset.dimensions(label='name')
not_found = [d for d in dimensions if d not in dataset.data]
if not_found:
raise ValueError('Following dimensions not found in data: %s' % not_found)
lengths = [len(dataset.data[dim]) for dim in dimensions if not np.isscalar(dataset.data[dim])]
if len({l for l in lengths if l > 1}) > 1:
raise ValueError('Length of columns do not match')
raise DataError('Following columns specified as dimensions '
'but not found in data: %s' % not_found, cls)
lengths = [(dim, 1 if np.isscalar(dataset.data[dim]) else len(dataset.data[dim]))
for dim in dimensions]
if len({l for d, l in lengths if l > 1}) > 1:
lengths = ', '.join(['%s: %d' % l for l in sorted(lengths)])
raise DataError('Length of columns must be equal or scalar, '
'columns have lengths: %s' % lengths, cls)


@classmethod
Expand Down
9 changes: 5 additions & 4 deletions holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from .dictionary import DictInterface
from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..dimension import OrderedDict as cyODict
Expand Down Expand Up @@ -74,10 +74,11 @@ def init(cls, eltype, data, kdims, vdims):
expected = tuple([len(data[kd]) for kd in kdim_names])
for vdim in vdim_names:
shape = data[vdim].shape
error = DataError if len(shape) > 1 else ValueError
if shape != expected[::-1] and not (not expected and shape == (1,)):
raise ValueError('Key dimension values and value array %s '
'shape do not match. Expected shape %s, '
'actual shape: %s' % (vdim, expected[::-1], shape))
raise error('Key dimension values and value array %s '
'shapes do not match. Expected shape %s, '
'actual shape: %s' % (vdim, expected[::-1], shape), cls)
return data, {'kdims':kdims, 'vdims':vdims}, {}


Expand Down
35 changes: 32 additions & 3 deletions holoviews/core/data/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
from .. import util


class DataError(ValueError):
"DataError is raised when the data cannot be interpreted"

def __init__(self, msg, interface=None):
if interface is not None:
msg = '\n\n'.join([msg, interface.error()])
super(DataError, self).__init__(msg)


class iloc(object):
"""
iloc is small wrapper object that allows row, column based
Expand Down Expand Up @@ -121,6 +130,24 @@ def cast(cls, dataset, datatype=None, cast_type=None):
for co in dataset]


@classmethod
def error(cls):
info = dict(interface=cls.__name__)
url = "http://holoviews.org/user_guide/%s_Datasets.html"
if cls.multi:
datatype = 'a list of tabular'
info['url'] = url % 'Tabular'
else:
if cls.gridded:
datatype = 'gridded'
else:
datatype = 'tabular'
info['url'] = url % datatype.capitalize()
info['datatype'] = datatype
return ("{interface} expects {datatype} data, for more information "
"on supported datatypes see {url}".format(**info))


@classmethod
def initialize(cls, eltype, data, kdims, vdims, datatype=None):
# Process params and dimensions
Expand Down Expand Up @@ -162,6 +189,8 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None):
try:
(data, dims, extra_kws) = interface.init(eltype, data, kdims, vdims)
break
except DataError:
raise
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't something similar needed in the try/except where all the data interfaces are tried, one by one?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused, that's exactly where this is.

except Exception:
pass
else:
Expand All @@ -176,9 +205,9 @@ def validate(cls, dataset):
not_found = [d for d in dataset.dimensions(label='name')
if d not in dataset.data]
if not_found:
raise ValueError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found))
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)


@classmethod
Expand Down
4 changes: 2 additions & 2 deletions holoviews/core/data/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from .interface import Interface
from .interface import Interface, DataError
from .grid import GridInterface
from ..dimension import Dimension
from ..element import Element
Expand Down Expand Up @@ -129,7 +129,7 @@ def init(cls, eltype, data, kdims, vdims):
@classmethod
def validate(cls, dataset):
if len(dataset.vdims) > 1:
raise ValueError("Iris cubes do not support more than one value dimension")
raise DataError("Iris cubes do not support more than one value dimension", cls)


@classmethod
Expand Down
9 changes: 5 additions & 4 deletions holoviews/core/data/multipath.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np

from ..util import max_range
from .interface import Interface
from .interface import Interface, DataError


class MultiInterface(Interface):
"""
Expand Down Expand Up @@ -40,11 +41,11 @@ def init(cls, eltype, data, kdims, vdims):
datatype=cls.subtypes)
if prev_interface:
if prev_interface != interface:
raise ValueError('MultiInterface subpaths must all have matching datatype.')
raise DataError('MultiInterface subpaths must all have matching datatype.', cls)
if dims['kdims'] != prev_dims['kdims']:
raise ValueError('MultiInterface subpaths must all have matching kdims.')
raise DataError('MultiInterface subpaths must all have matching kdims.', cls)
if dims['vdims'] != prev_dims['vdims']:
raise ValueError('MultiInterface subpaths must all have matching vdims.')
raise DataError('MultiInterface subpaths must all have matching vdims.', cls)
new_data.append(d)
prev_interface, prev_dims = interface, dims
return new_data, dims, {}
Expand Down
11 changes: 7 additions & 4 deletions holoviews/core/data/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import pandas as pd

from .interface import Interface
from .interface import Interface, DataError
from ..dimension import Dimension
from ..element import Element
from ..dimension import OrderedDict as cyODict
Expand Down Expand Up @@ -44,6 +44,9 @@ def init(cls, eltype, data, kdims, vdims):
elif kdims is None and vdims is None:
kdims = list(data.columns[:ndim])
vdims = [] if ndim is None else list(data.columns[ndim:])
if any(isinstance(d, (np.int64, int)) for d in kdims+vdims):
raise DataError("pandas DataFrame column names used as dimensions "
"must be strings not integers.", cls)
else:
# Check if data is of non-numeric type
# Then use defined data type
Expand Down Expand Up @@ -92,9 +95,9 @@ def validate(cls, dataset):
not_found = [d for d in dataset.dimensions(label='name')
if d not in dataset.data.columns]
if not_found:
raise ValueError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found))
raise DataError("Supplied data does not contain specified "
"dimensions, the following dimensions were "
"not found: %s" % repr(not_found), cls)


@classmethod
Expand Down
10 changes: 9 additions & 1 deletion holoviews/core/data/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ..ndmapping import NdMapping, item_check, sorted_context
from ..element import Element
from .grid import GridInterface
from .interface import Interface
from .interface import Interface, DataError


class XArrayInterface(GridInterface):
Expand Down Expand Up @@ -50,6 +50,9 @@ def init(cls, eltype, data, kdims, vdims):
vdim = vdims[0]
elif len(vdim_param.default) == 1:
vdim = vdim_param.default[0]
else:
raise DataError("If xarray DataArray does not define a name "
"an explicit vdim must be supplied.", cls)
vdims = [vdim]
if not kdims:
kdims = [Dimension(d) for d in data.dims[::-1]]
Expand Down Expand Up @@ -88,8 +91,13 @@ def init(cls, eltype, data, kdims, vdims):
kdims = [name for name in data.indexes.keys()
if isinstance(data[name].data, np.ndarray)]

not_found = [d for d in kdims if d.name not in data.coords]
if not isinstance(data, xr.Dataset):
raise TypeError('Data must be be an xarray Dataset type.')
elif not_found:
raise DataError("xarray Dataset must define coordinates "
"for all defined kdims, %s coordinates not found."
% not_found, cls)
return data, {'kdims': kdims, 'vdims': vdims}, {}


Expand Down
7 changes: 3 additions & 4 deletions tests/testmultiinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import numpy as np
from holoviews import Dataset
from holoviews.core.data.interface import DataError
from holoviews.element import Path
from holoviews.element.comparison import ComparisonTestCase

Expand Down Expand Up @@ -112,15 +113,13 @@ def test_multi_array_redim(self):
def test_multi_mixed_interface_raises(self):
arrays = [np.random.rand(10, 2) if j else {'x': range(10), 'y': range(10)}
for i in range(2) for j in range(2)]
error = "None of the available storage backends were able to support the supplied data format."
with self.assertRaisesRegexp(ValueError, error):
with self.assertRaises(DataError):
mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular'])

def test_multi_mixed_dims_raises(self):
arrays = [{'x': range(10), 'y' if j else 'z': range(10)}
for i in range(2) for j in range(2)]
error = "Following dimensions not found in data: \['y'\]"
with self.assertRaisesRegexp(ValueError, error):
with self.assertRaises(DataError):
mds = Path(arrays, kdims=['x', 'y'], datatype=['multitabular'])

def test_multi_split(self):
Expand Down