Navigation Menu

Skip to content

Commit

Permalink
BUG: fixed json_normalize for subrecords with NoneTypes (pandas-dev#2…
Browse files Browse the repository at this point in the history
…0030)

TST: additional coverage for the test cases from (pandas-dev#20030)

DOC: added changes to whatsnew/v0.23.0.txt (pandas-dev#20030)
  • Loading branch information
aerymilts committed Mar 18, 2018
1 parent 670c2e4 commit 326107c
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Expand Up @@ -979,6 +979,7 @@ I/O
- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`)
- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`)
- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`)
- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)

Plotting
^^^^^^^^
Expand Down
5 changes: 4 additions & 1 deletion pandas/io/json/normalize.py
Expand Up @@ -80,6 +80,8 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
if level != 0: # so we skip copying for top level, common case
v = new_d.pop(k)
new_d[newkey] = v
if v is None: # pop the key if the value is None
new_d.pop(k)
continue
else:
v = new_d.pop(k)
Expand Down Expand Up @@ -189,7 +191,8 @@ def _pull_field(js, spec):
data = [data]

if record_path is None:
if any(isinstance(x, dict) for x in compat.itervalues(data[0])):
if any([[isinstance(x, dict)
for x in compat.itervalues(y)] for y in data]):
# naive normalization, this is idempotent for flat records
# and potentially will inflate the data considerably for
# deeply nested structures:
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/io/json/test_normalize.py
@@ -1,6 +1,7 @@
import pytest
import numpy as np
import json
import math

import pandas.util.testing as tm
from pandas import compat, Index, DataFrame
Expand Down Expand Up @@ -54,6 +55,17 @@ def state_data():
'state': 'Ohio'}]


@pytest.fixture
def author_missing_data():
return [
{'info': None},
{'info':
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
'author_name':
{'first': 'Jane', 'last_name': 'Doe'}
}]


class TestJSONNormalize(object):

def test_simple_records(self):
Expand Down Expand Up @@ -226,6 +238,21 @@ def test_non_ascii_key(self):
result = json_normalize(json.loads(testjson))
tm.assert_frame_equal(result, expected)

def test_missing_field(self, author_missing_data):
result = json_normalize(author_missing_data)
ex_data = [
{'author_name.first': math.nan,
'author_name.last_name': math.nan,
'info.created_at': math.nan,
'info.last_updated': math.nan},
{'author_name.first': 'Jane',
'author_name.last_name': 'Doe',
'info.created_at': '11/08/1993',
'info.last_updated': '26/05/2012'}
]
expected = DataFrame(ex_data)
tm.assert_frame_equal(result, expected)


class TestNestedToRecord(object):

Expand Down Expand Up @@ -322,3 +349,26 @@ def test_json_normalize_errors(self):
['general', 'trade_version']],
errors='raise'
)

def test_nonetype_dropping(self):
data = [
{'info': None,
'author_name':
{'first': 'Smith', 'last_name': 'Appleseed'}
},
{'info':
{'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
'author_name':
{'first': 'Jane', 'last_name': 'Doe'}
}
]
result = nested_to_record(data)
expected = [
{'author_name.first': 'Smith',
'author_name.last_name': 'Appleseed'},
{'author_name.first': 'Jane',
'author_name.last_name': 'Doe',
'info.created_at': '11/08/1993',
'info.last_updated': '26/05/2012'}]

assert result == expected

0 comments on commit 326107c

Please sign in to comment.