Skip to content

Commit

Permalink
BUG: Fix type coercion in read_json orient='table' (pandas-dev#21345) (
Browse files Browse the repository at this point in the history
  • Loading branch information
albertvillanova authored and haison committed Mar 12, 2019
1 parent 2c41ebc commit 8fd5336
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ I/O
^^^

- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
-
-
-
Expand Down
20 changes: 16 additions & 4 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
return serialized


def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
convert_axes=True, convert_dates=True, keep_default_dates=True,
numpy=False, precise_float=False, date_unit=None, encoding=None,
lines=False, chunksize=None, compression='infer'):
Expand Down Expand Up @@ -278,8 +278,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
typ : type of object to recover (series or frame), default 'frame'
dtype : boolean or dict, default True
If True, infer dtypes, if a dict of column to dtype, then use those,
If True, infer dtypes; if a dict of column to dtype, then use those;
if False, then don't infer dtypes at all, applies only to the data.
Not applicable with ``orient='table'``.
.. versionchanged:: 0.25
Not applicable with ``orient='table'``.
convert_axes : boolean, default True
Try to convert the axes to the proper dtypes.
convert_dates : boolean, default True
Expand Down Expand Up @@ -408,6 +415,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
{"index": "row 2", "col 1": "c", "col 2": "d"}]}'
"""

if orient == 'table' and dtype:
raise ValueError("cannot pass both dtype and orient='table'")

dtype = orient != 'table' if dtype is None else dtype

compression = _infer_compression(path_or_buf, compression)
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
path_or_buf, encoding=encoding, compression=compression,
Expand Down Expand Up @@ -600,15 +612,15 @@ class Parser(object):
'us': long(31536000000000),
'ns': long(31536000000000000)}

def __init__(self, json, orient, dtype=True, convert_axes=True,
def __init__(self, json, orient, dtype=None, convert_axes=True,
convert_dates=True, keep_default_dates=False, numpy=False,
precise_float=False, date_unit=None):
self.json = json

if orient is None:
orient = self._default_orient

self.orient = orient

self.dtype = dtype

if orient == "split":
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/json/test_json_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,12 +502,12 @@ class TestTableOrientReader(object):
@pytest.mark.parametrize("vals", [
{'ints': [1, 2, 3, 4]},
{'objects': ['a', 'b', 'c', 'd']},
{'objects': ['1', '2', '3', '4']},
{'date_ranges': pd.date_range('2016-01-01', freq='d', periods=4)},
{'categoricals': pd.Series(pd.Categorical(['a', 'b', 'c', 'c']))},
{'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
ordered=True))},
pytest.param({'floats': [1., 2., 3., 4.]},
marks=pytest.mark.xfail),
{'floats': [1., 2., 3., 4.]},
{'floats': [1.1, 2.2, 3.3, 4.4]},
{'bools': [True, False, False, True]}])
def test_read_json_table_orient(self, index_nm, vals, recwarn):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,21 @@ def test_data_frame_size_after_to_json(self):

assert size_before == size_after

def test_from_json_to_json_table_dtypes(self):
# GH21345
expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
dfjson = expected.to_json(orient='table')
result = pd.read_json(dfjson, orient='table')
assert_frame_equal(result, expected)

@pytest.mark.parametrize('dtype', [True, {'b': int, 'c': int}])
def test_read_json_table_dtype_raises(self, dtype):
# GH21345
df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
dfjson = df.to_json(orient='table')
with pytest.raises(ValueError):
pd.read_json(dfjson, orient='table', dtype=dtype)

@pytest.mark.parametrize('data, expected', [
(DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
{'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
Expand Down

0 comments on commit 8fd5336

Please sign in to comment.