diff --git a/odo/backends/tests/test_json.py b/odo/backends/tests/test_json.py index 0affe565..14e7b0bc 100644 --- a/odo/backends/tests/test_json.py +++ b/odo/backends/tests/test_json.py @@ -1,17 +1,22 @@ from __future__ import absolute_import, division, print_function -from odo.backends.json import * -from odo.utils import tmpfile, ignoring -from odo import into -from odo.temp import Temp, _Temp -from contextlib import contextmanager -from datashape import dshape import datetime import os import gzip import os import json +from contextlib import contextmanager + +import numpy as np +from odo.backends.json import json_dumps +from odo.utils import tmpfile, ignoring +from odo import odo, discover, JSONLines, resource, JSON, convert, append, drop +from odo.temp import Temp, _Temp + +from datashape import dshape + + @contextmanager def json_file(data): with tmpfile('.json') as fn: @@ -20,6 +25,7 @@ def json_file(data): yield fn + @contextmanager def jsonlines_file(data): with tmpfile('.json') as fn: @@ -34,11 +40,13 @@ def jsonlines_file(data): dat = [{'name': 'Alice', 'amount': 100}, {'name': 'Bob', 'amount': 200}] + def test_discover_json(): with json_file(dat) as fn: j = JSON(fn) assert discover(j) == discover(dat) + def test_discover_jsonlines(): with jsonlines_file(dat) as fn: j = JSONLines(fn) @@ -239,3 +247,18 @@ def test_drop(): assert os.path.exists(fn) drop(js) assert not os.path.exists(fn) + + +def test_missing_to_csv(): + data = [dict(a=1, b=2), dict(a=2, c=4)] + with tmpfile('.json') as fn: + js = JSON(fn) + js = odo(data, js) + + with tmpfile('.csv') as csvf: + csv = odo(js, csvf) + with open(csv.path, 'rt') as f: + result = f.read() + + expected = 'a,b,c\n1,2.0,\n2,,4.0\n' + assert result == expected diff --git a/odo/convert.py b/odo/convert.py index be1b5afc..ff18c896 100644 --- a/odo/convert.py +++ b/odo/convert.py @@ -159,16 +159,15 @@ def element_of(seq): seq = seq[0] return seq + @convert.register(np.ndarray, list, cost=10.0) def list_to_numpy(seq, dshape=None, **kwargs): if isinstance(element_of(seq), dict): seq = list(records_to_tuples(dshape, seq)) - if (seq and isinstance(seq[0], Iterable) - and not ishashable(seq[0]) - and not isscalar(dshape)): + if (seq and isinstance(seq[0], Iterable) and not ishashable(seq[0]) and + not isscalar(dshape)): seq = list(map(tuple, seq)) - dtype = dshape_to_numpy(dshape) - return np.array(seq, dtype=dtype) + return np.array(seq, dtype=dshape_to_numpy(dshape)) @convert.register(Iterator, list, cost=0.001) diff --git a/odo/tests/test_convert.py b/odo/tests/test_convert.py index 789bab90..ad451258 100644 --- a/odo/tests/test_convert.py +++ b/odo/tests/test_convert.py @@ -127,6 +127,20 @@ def test_list_to_numpy_on_dicts(): assert convert(list, x) == [('Alice', 100), ('Bob', 200)] +def test_list_of_dicts_with_missing_to_numpy(): + data = [{'name': 'Alice', 'amount': 100}, + {'name': 'Bob'}, + {'amount': 200}] + result = convert(np.ndarray, data) + assert result.dtype.names == ('amount', 'name') + expected = np.array([(100.0, 'Alice'), + (np.nan, 'Bob'), + (200.0, None)], + dtype=[('amount', 'float64'), ('name', 'O')]) + assert np.all((result == expected) | + ((result != result) & (expected != expected))) + + def test_chunks_numpy_pandas(): x = np.array([('Alice', 100), ('Bob', 200)], dtype=[('name', 'S7'), ('amount', 'i4')]) diff --git a/odo/utils.py b/odo/utils.py index 612f25fb..b00e5b33 100644 --- a/odo/utils.py +++ b/odo/utils.py @@ -207,7 +207,7 @@ def records_to_tuples(ds, data): if isinstance(ds, (str, unicode)): ds = dshape(ds) if isinstance(ds.measure, Record) and len(ds.shape) == 1: - return pluck(ds.measure.names, data) + return pluck(ds.measure.names, data, default=None) if isinstance(ds.measure, Record) and len(ds.shape) == 0: return get(ds.measure.names, data) if not isinstance(ds.measure, Record):