Skip to content

Commit

Permalink
ENH: Added errors{'raise','ignore'} for keys not found in meta for js…
Browse files Browse the repository at this point in the history
…on_normalize

Author: dickreuter <dickreuter@yahoo.com>

Closes pandas-dev#14583 from dickreuter/json_normalize_enhancement and squashes the following commits:

701c140 [dickreuter] adjusted formatting
3c94206 [dickreuter] shortened lines to pass linting
2028924 [dickreuter] doc changes
d298588 [dickreuter] Fixed as instructed in pull request page
bcfbf18 [dickreuter] Avoids exception when pandas.io.json.json_normalize
  • Loading branch information
dickreuter authored and ischurov committed Dec 19, 2016
1 parent e8775aa commit 4d40d6c
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ Other enhancements
- The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value (:issue:`14154`)
- ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)


.. _whatsnew_0200.api_breaking:

Expand Down
22 changes: 20 additions & 2 deletions pandas/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,9 @@ def nested_to_record(ds, prefix="", level=0):

def json_normalize(data, record_path=None, meta=None,
meta_prefix=None,
record_prefix=None):
record_prefix=None,
errors='raise'):

"""
"Normalize" semi-structured JSON data into a flat table
Expand All @@ -742,6 +744,13 @@ def json_normalize(data, record_path=None, meta=None,
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
path to records is ['foo', 'bar']
meta_prefix : string, default None
errors : {'raise', 'ignore'}, default 'raise'
* ignore : will ignore KeyError if keys listed in meta are not
always present
* raise : will raise KeyError if keys listed in meta are not
always present
.. versionadded:: 0.20.0
Returns
-------
Expand Down Expand Up @@ -841,7 +850,16 @@ def _recursive_extract(data, path, seen_meta, level=0):
if level + 1 > len(val):
meta_val = seen_meta[key]
else:
meta_val = _pull_field(obj, val[level:])
try:
meta_val = _pull_field(obj, val[level:])
except KeyError as e:
if errors == 'ignore':
meta_val = np.nan
else:
raise \
KeyError("Try running with "
"errors='ignore' as key "
"%s is not always present", e)
meta_vals[key].append(meta_val)

records.extend(recs)
Expand Down
59 changes: 59 additions & 0 deletions pandas/io/tests/json/test_json_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,65 @@ def test_nested_flattens(self):

self.assertEqual(result, expected)

def test_json_normalize_errors(self):
# GH14583: If meta keys are not always present
# a new option to set errors='ignore' has been implemented
i = {
"Trades": [{
"general": {
"tradeid": 100,
"trade_version": 1,
"stocks": [{

"symbol": "AAPL",
"name": "Apple",
"price": "0"
}, {
"symbol": "GOOG",
"name": "Google",
"price": "0"
}
]
}
}, {
"general": {
"tradeid": 100,
"stocks": [{
"symbol": "AAPL",
"name": "Apple",
"price": "0"
}, {
"symbol": "GOOG",
"name": "Google",
"price": "0"
}
]
}
}
]
}
j = json_normalize(data=i['Trades'],
record_path=[['general', 'stocks']],
meta=[['general', 'tradeid'],
['general', 'trade_version']],
errors='ignore')
expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
'price': {0: '0', 1: '0', 2: '0', 3: '0'},
'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}

self.assertEqual(j.fillna('').to_dict(), expected)

self.assertRaises(KeyError,
json_normalize, data=i['Trades'],
record_path=[['general', 'stocks']],
meta=[['general', 'tradeid'],
['general', 'trade_version']],
errors='raise'
)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
'--pdb-failure', '-s'], exit=False)

0 comments on commit 4d40d6c

Please sign in to comment.