BUG: Fix type coercion in read_json orient='table' (pandas-dev#21345) (…

…pandas-dev#25219)
hksonngan · Mar 12, 2019 · 8fd5336 · 8fd5336
1 parent 2c41ebc
commit 8fd5336
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 6 deletions.
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -160,6 +160,7 @@ I/O
 ^^^
 
 - Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`)
+- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`)
 -
 -
 -

diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -226,7 +226,7 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
         return serialized
 
 
-def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
+def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None,
               convert_axes=True, convert_dates=True, keep_default_dates=True,
               numpy=False, precise_float=False, date_unit=None, encoding=None,
               lines=False, chunksize=None, compression='infer'):
@@ -278,8 +278,15 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
     typ : type of object to recover (series or frame), default 'frame'
     dtype : boolean or dict, default True
-        If True, infer dtypes, if a dict of column to dtype, then use those,
+        If True, infer dtypes; if a dict of column to dtype, then use those;
         if False, then don't infer dtypes at all, applies only to the data.
+
+        Not applicable with ``orient='table'``.
+
+        .. versionchanged:: 0.25
+
+           Not applicable with ``orient='table'``.
+
     convert_axes : boolean, default True
         Try to convert the axes to the proper dtypes.
     convert_dates : boolean, default True
@@ -408,6 +415,11 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
                 {"index": "row 2", "col 1": "c", "col 2": "d"}]}'
     """
 
+    if orient == 'table' and dtype:
+        raise ValueError("cannot pass both dtype and orient='table'")
+
+    dtype = orient != 'table' if dtype is None else dtype
+
     compression = _infer_compression(path_or_buf, compression)
     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
         path_or_buf, encoding=encoding, compression=compression,
@@ -600,15 +612,15 @@ class Parser(object):
         'us': long(31536000000000),
         'ns': long(31536000000000000)}
 
-    def __init__(self, json, orient, dtype=True, convert_axes=True,
+    def __init__(self, json, orient, dtype=None, convert_axes=True,
                  convert_dates=True, keep_default_dates=False, numpy=False,
                  precise_float=False, date_unit=None):
         self.json = json
 
         if orient is None:
             orient = self._default_orient
-
         self.orient = orient
+
         self.dtype = dtype
 
         if orient == "split":

diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
@@ -502,12 +502,12 @@ class TestTableOrientReader(object):
     @pytest.mark.parametrize("vals", [
         {'ints': [1, 2, 3, 4]},
         {'objects': ['a', 'b', 'c', 'd']},
+        {'objects': ['1', '2', '3', '4']},
         {'date_ranges': pd.date_range('2016-01-01', freq='d', periods=4)},
         {'categoricals': pd.Series(pd.Categorical(['a', 'b', 'c', 'c']))},
         {'ordered_cats': pd.Series(pd.Categorical(['a', 'b', 'c', 'c'],
                                                   ordered=True))},
-        pytest.param({'floats': [1., 2., 3., 4.]},
-                     marks=pytest.mark.xfail),
+        {'floats': [1., 2., 3., 4.]},
         {'floats': [1.1, 2.2, 3.3, 4.4]},
         {'bools': [True, False, False, True]}])
     def test_read_json_table_orient(self, index_nm, vals, recwarn):

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1202,6 +1202,21 @@ def test_data_frame_size_after_to_json(self):
 
         assert size_before == size_after
 
+    def test_from_json_to_json_table_dtypes(self):
+        # GH21345
+        expected = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
+        dfjson = expected.to_json(orient='table')
+        result = pd.read_json(dfjson, orient='table')
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('dtype', [True, {'b': int, 'c': int}])
+    def test_read_json_table_dtype_raises(self, dtype):
+        # GH21345
+        df = pd.DataFrame({'a': [1, 2], 'b': [3., 4.], 'c': ['5', '6']})
+        dfjson = df.to_json(orient='table')
+        with pytest.raises(ValueError):
+            pd.read_json(dfjson, orient='table', dtype=dtype)
+
     @pytest.mark.parametrize('data, expected', [
         (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
             {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),