Skip to content

Commit

Permalink
Merge pull request #228 from cpcloud/missing-json-to-csv
Browse files Browse the repository at this point in the history
Add a default of None when going from records to tuples
  • Loading branch information
cpcloud committed Jun 20, 2015
2 parents b3dec1a + bcbbb1a commit 2b56485
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 12 deletions.
35 changes: 29 additions & 6 deletions odo/backends/tests/test_json.py
@@ -1,17 +1,22 @@
from __future__ import absolute_import, division, print_function

from odo.backends.json import *
from odo.utils import tmpfile, ignoring
from odo import into
from odo.temp import Temp, _Temp
from contextlib import contextmanager
from datashape import dshape
import datetime
import os
import gzip
import os
import json

from contextlib import contextmanager

import numpy as np
from odo.backends.json import json_dumps
from odo.utils import tmpfile, ignoring
from odo import odo, discover, JSONLines, resource, JSON, convert, append, drop
from odo.temp import Temp, _Temp

from datashape import dshape


@contextmanager
def json_file(data):
with tmpfile('.json') as fn:
Expand All @@ -20,6 +25,7 @@ def json_file(data):

yield fn


@contextmanager
def jsonlines_file(data):
with tmpfile('.json') as fn:
Expand All @@ -34,11 +40,13 @@ def jsonlines_file(data):
dat = [{'name': 'Alice', 'amount': 100},
{'name': 'Bob', 'amount': 200}]


def test_discover_json():
with json_file(dat) as fn:
j = JSON(fn)
assert discover(j) == discover(dat)


def test_discover_jsonlines():
with jsonlines_file(dat) as fn:
j = JSONLines(fn)
Expand Down Expand Up @@ -239,3 +247,18 @@ def test_drop():
assert os.path.exists(fn)
drop(js)
assert not os.path.exists(fn)


def test_missing_to_csv():
data = [dict(a=1, b=2), dict(a=2, c=4)]
with tmpfile('.json') as fn:
js = JSON(fn)
js = odo(data, js)

with tmpfile('.csv') as csvf:
csv = odo(js, csvf)
with open(csv.path, 'rt') as f:
result = f.read()

expected = 'a,b,c\n1,2.0,\n2,,4.0\n'
assert result == expected
9 changes: 4 additions & 5 deletions odo/convert.py
Expand Up @@ -159,16 +159,15 @@ def element_of(seq):
seq = seq[0]
return seq


@convert.register(np.ndarray, list, cost=10.0)
def list_to_numpy(seq, dshape=None, **kwargs):
if isinstance(element_of(seq), dict):
seq = list(records_to_tuples(dshape, seq))
if (seq and isinstance(seq[0], Iterable)
and not ishashable(seq[0])
and not isscalar(dshape)):
if (seq and isinstance(seq[0], Iterable) and not ishashable(seq[0]) and
not isscalar(dshape)):
seq = list(map(tuple, seq))
dtype = dshape_to_numpy(dshape)
return np.array(seq, dtype=dtype)
return np.array(seq, dtype=dshape_to_numpy(dshape))


@convert.register(Iterator, list, cost=0.001)
Expand Down
14 changes: 14 additions & 0 deletions odo/tests/test_convert.py
Expand Up @@ -127,6 +127,20 @@ def test_list_to_numpy_on_dicts():
assert convert(list, x) == [('Alice', 100), ('Bob', 200)]


def test_list_of_dicts_with_missing_to_numpy():
data = [{'name': 'Alice', 'amount': 100},
{'name': 'Bob'},
{'amount': 200}]
result = convert(np.ndarray, data)
assert result.dtype.names == ('amount', 'name')
expected = np.array([(100.0, 'Alice'),
(np.nan, 'Bob'),
(200.0, None)],
dtype=[('amount', 'float64'), ('name', 'O')])
assert np.all((result == expected) |
((result != result) & (expected != expected)))


def test_chunks_numpy_pandas():
x = np.array([('Alice', 100), ('Bob', 200)],
dtype=[('name', 'S7'), ('amount', 'i4')])
Expand Down
2 changes: 1 addition & 1 deletion odo/utils.py
Expand Up @@ -207,7 +207,7 @@ def records_to_tuples(ds, data):
if isinstance(ds, (str, unicode)):
ds = dshape(ds)
if isinstance(ds.measure, Record) and len(ds.shape) == 1:
return pluck(ds.measure.names, data)
return pluck(ds.measure.names, data, default=None)
if isinstance(ds.measure, Record) and len(ds.shape) == 0:
return get(ds.measure.names, data)
if not isinstance(ds.measure, Record):
Expand Down

0 comments on commit 2b56485

Please sign in to comment.