In [1]:
import pandas as pd
import json
import joblib
import numpy as np

In [2]:
data = { 
    'company': 'XYZ pvt ltd', 
    'location': 'London', 
    'info': { 
        'total':[0,1,2,3,4,5,6,7,8,9],
        'president': 'Rakesh Kapoor', 
        'contacts': { 
            'email': 'contact@xyz.com', 
            'tel': '9876543210'
        }
    }
}

In [3]:
pd.json_normalize(data)

Unnamed: 0,company,location,info.total,info.president,info.contacts.email,info.contacts.tel
0,XYZ pvt ltd,London,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",Rakesh Kapoor,contact@xyz.com,9876543210


In [4]:
data = [ 
    { 
        'id': '001', 
        'company': 'XYZ pvt ltd', 
        'location': 'London', 
        'info': { 
            'president': 'Rakesh Kapoor', 
            'contacts': { 
                    'email': 'contact@xyz.com', 
                    'tel': '9876543210'
            } 
        } 
    }, 
    { 
        'id': '002', 
        'company': 'PQR Associates', 
        'location': 'Abu Dhabi', 
        'info': { 
            'president': 'Neelam Subramaniyam', 
            'contacts': { 
                    'email': 'contact@pqr.com', 
                    'tel': '8876443210'
            } 
        } 
    } 
] 

pd.json_normalize(data)

Unnamed: 0,id,company,location,info.president,info.contacts.email,info.contacts.tel
0,1,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210
1,2,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210


In [5]:
data = { 
    'company': 'XYZ pvt ltd', 
    'location': 'London', 
    'info': { 
        'president': 'Rakesh Kapoor', 
        'contacts': { 
            'email': 'contact@xyz.com', 
            'tel': '9876543210'
        } 
    }, 
    'employees': [ 
        {'name': 'A'}, 
        {'name': 'B'}, 
        {'name': 'C'} 
    ] 
} 
df = pd.json_normalize(data) 
df

Unnamed: 0,company,location,employees,info.president,info.contacts.email,info.contacts.tel
0,XYZ pvt ltd,London,"[{'name': 'A'}, {'name': 'B'}, {'name': 'C'}]",Rakesh Kapoor,contact@xyz.com,9876543210


In [6]:
df = pd.json_normalize(data, record_path=['employees'], meta=[ 
                  'company', 'location', ['info', 'president']]) 
df

Unnamed: 0,name,company,location,info.president
0,A,XYZ pvt ltd,London,Rakesh Kapoor
1,B,XYZ pvt ltd,London,Rakesh Kapoor
2,C,XYZ pvt ltd,London,Rakesh Kapoor


In [109]:
data = [ 
    { 
        'id': '001', 
        'company': 'XYZ pvt ltd', 
        'location': 'London', 
        'info': { 
            'president': 'Rakesh Kapoor', 
            'contacts': { 
                    'email': 'contact@xyz.com', 
                    'tel': '9876543210'
            } 
        }, 
        'employees': [ 
            {'name': 'A'}, 
            {'name': 'B'}, 
            {'name': 'C'} 
        ] 
    }, 
    { 
        'id': '002', 
        'company': 'PQR Associates', 
        'location': 'Abu Dhabi', 
        'info': { 
            'president': 'Neelam Subramaniyam', 
            'contacts': { 
                    'email': 'contact@pqr.com', 
                    'tel': '8876443210'
            } 
        }, 
        'employees': [ 
            {'name': 'L'}, 
            {'name': 'M'}, 
            {'name': 'N'} 
        ] 
    } 
] 
  
df = pd.json_normalize(data, record_path=['employees'], meta=[ 
                       'company', 'location', ['info', 'president',]]) 
df

Unnamed: 0,name,company,location,info.president
0,A,XYZ pvt ltd,London,Rakesh Kapoor
1,B,XYZ pvt ltd,London,Rakesh Kapoor
2,C,XYZ pvt ltd,London,Rakesh Kapoor
3,L,PQR Associates,Abu Dhabi,Neelam Subramaniyam
4,M,PQR Associates,Abu Dhabi,Neelam Subramaniyam
5,N,PQR Associates,Abu Dhabi,Neelam Subramaniyam


In [110]:
def get_dict_cols(data):
    to_parse = []
    for col in data:
        if any([isinstance(i, dict) for i in data[col].values]):
            to_parse.append(col)
    return to_parse

def get_list_cols(data):
    to_parse = []
    for col in data:
        if any([isinstance(i, list) for i in data[col].values]):
            to_parse.append(col)
    return to_parse



In [140]:
pd.json_normalize(data, record_path=['employees'], meta=['id','company','location',
                                                         ['info','president'],
                                                         ['info','contacts','email'],
                                                        ['info','contacts','tel']])

Unnamed: 0,name,id,company,location,info.president,info.contacts.email,info.contacts.tel
0,A,1,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210
1,B,1,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210
2,C,1,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210
3,L,2,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210
4,M,2,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210
5,N,2,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210


In [143]:
def parse_nested_dataframe(dataframe, orig_json):
    list_cols = get_list_cols(dataframe)
    tmp = dataframe.copy()
    print('lists:', list_cols)
    if len(list_cols) > 0:
        meta_cols = [i.split('.') if '.' in i else i for i in tmp]
        meta_cols = [i for i in meta_cols if i not in list_cols]
        tmp = pd.json_normalize(data, record_path=list_cols, meta=meta_cols)
        tmp = parse_nested_dataframe(tmp, orig_json)
    return tmp


In [167]:
def parse_nested_dataframe(dataframe, orig_json):
    list_cols = get_list_cols(dataframe)
    list_cols = [i for i in list_cols if '.' not in i]
    tmp = dataframe.copy()
    print('lists:', list_cols)
    if len(list_cols) > 0:
        meta_cols = [i.split('.') if '.' in i else i for i in tmp]
        meta_cols = [i for i in meta_cols if i not in list_cols]
        tmp = pd.json_normalize(data, record_path=list_cols, meta=meta_cols)
        tmp = parse_nested_dataframe(tmp, orig_json)
    list_cols = get_list_cols(tmp)
    if len(list_cols) > 0:
        tmp = pd.concat([tmp, pd.json_normalize(tmp[list_cols])])
    return tmp


In [168]:
data = [ 
    { 
        'id': '001', 
        'company': 'XYZ pvt ltd', 
        'location': 'London', 
        'info': { 
            'president': 'Rakesh Kapoor', 
            'contacts': { 
                    'email': 'contact@xyz.com', 
                    'tel': '9876543210',
                    'preferences':[{'fruits':'orange'},{'time':'noon'}],
            } 
        }, 
        'employees': [ 
            {'name': 'A'}, 
            {'name': 'B'}, 
            {'name': 'C'} 
        ] 
    }, 
    { 
        'id': '002', 
        'company': 'PQR Associates', 
        'location': 'Abu Dhabi', 
        'info': { 
            'president': 'Neelam Subramaniyam', 
            'contacts': { 
                    'email': 'contact@pqr.com', 
                    'tel': '8876443210',
                    'preferences':[{'fruits':'lime'},{'time':'evening'}]
            } 
        }, 
        'employees': [ 
            {'name': 'L'}, 
            {'name': 'M'}, 
            {'name': 'N'} 
        ] 
    } 
] 

In [169]:
norm = pd.json_normalize(data)
norm

Unnamed: 0,id,company,location,employees,info.president,info.contacts.email,info.contacts.tel,info.contacts.preferences
0,1,XYZ pvt ltd,London,"[{'name': 'A'}, {'name': 'B'}, {'name': 'C'}]",Rakesh Kapoor,contact@xyz.com,9876543210,"[{'fruits': 'orange'}, {'time': 'noon'}]"
1,2,PQR Associates,Abu Dhabi,"[{'name': 'L'}, {'name': 'M'}, {'name': 'N'}]",Neelam Subramaniyam,contact@pqr.com,8876443210,"[{'fruits': 'lime'}, {'time': 'evening'}]"


In [170]:
parse_nested_dataframe(norm, data)

lists: ['employees']
lists: []


Unnamed: 0,name,id,company,location,info.president,info.contacts.email,info.contacts.tel,info.contacts.preferences
0,A,1.0,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210.0,"[{'fruits': 'orange'}, {'time': 'noon'}]"
1,B,1.0,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210.0,"[{'fruits': 'orange'}, {'time': 'noon'}]"
2,C,1.0,XYZ pvt ltd,London,Rakesh Kapoor,contact@xyz.com,9876543210.0,"[{'fruits': 'orange'}, {'time': 'noon'}]"
3,L,2.0,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210.0,"[{'fruits': 'lime'}, {'time': 'evening'}]"
4,M,2.0,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210.0,"[{'fruits': 'lime'}, {'time': 'evening'}]"
5,N,2.0,PQR Associates,Abu Dhabi,Neelam Subramaniyam,contact@pqr.com,8876443210.0,"[{'fruits': 'lime'}, {'time': 'evening'}]"
0,,,,,,,,
0,,,,,,,,
