# Adding Settings to Normalize

In this notebook, the feature update to add the switch of whether or not to expand lists and dicts is tested.

## Imports

In [1]:
import os
import sys
sys.path.insert(0, os.pardir)

In [2]:
import pandas as pd
from flat_table import mapper, normalize

## Sample Dataframe

In [51]:
data = [
    (
        1001, 
        { 'first_name': 'john', 'last_name': 'smith', 'phones': {'mobile': '201-..', 'home': '978-..'} },
        ['a', 'b'],
        [{ 'zip': '07014', 'city': 'clifton' }],
        { 'membership': True, 'memberid': '1231' }
    ),
    (
        1002, 
        pd.np.nan, 
        ['one', 'two', 'three', 'four'],
        [{'zip': '07014', 'address1': '1 Journal Square'}],
        { 'membership': False, 'memberid': '' }
    ),
    (
        1003, 
        { 'first_name': 'marry', 'last_name': 'kate', 'gender': 'female' }, 
        ['w', 'y', 'z'],
        [{ 'zip': '10001', 'city': 'new york' }, { 'zip': '10008', 'city': 'brooklyn' }],
        { 'membership': True, 'memberid': '9754' }
    ),
]
df = pd.DataFrame(data, columns=['id', 'user_info', 'name_list', 'address', 'membership'])

In [52]:
df.head()

Unnamed: 0,id,user_info,name_list,address,membership
0,1001,"{'first_name': 'john', 'last_name': 'smith', '...","[a, b]","[{'zip': '07014', 'city': 'clifton'}]","{'membership': True, 'memberid': '1231'}"
1,1002,,"[one, two, three, four]","[{'zip': '07014', 'address1': '1 Journal Squar...","{'membership': False, 'memberid': ''}"
2,1003,"{'first_name': 'marry', 'last_name': 'kate', '...","[w, y, z]","[{'zip': '10001', 'city': 'new york'}, {'zip':...","{'membership': True, 'memberid': '9754'}"


## Setttings

In [53]:
mp = mapper(df)
mp

Unnamed: 0,parent,child,type,obj
0,.,id,int,"0 1001 1 1002 2 1003 Name: id, dtype:..."
1,.,user_info,dict,"0 {'first_name': 'john', 'last_name': 'smit..."
2,user_info,user_info.gender,str,0 NaN 1 NaN 2 female Name: gend...
3,user_info,user_info.phones.home,str,0 978-.. 1 NaN 2 NaN Name: phon...
4,user_info,user_info.phones.mobile,str,0 201-.. 1 NaN 2 NaN Name: phon...
5,user_info,user_info.last_name,str,0 smith 1 NaN 2 kate Name: last_na...
6,user_info,user_info.first_name,str,0 john 1 NaN 2 marry Name: first_n...
7,.,name_list,list,"0 [a, b] 1 [one, two, t..."
8,,name_list,str,0 a 0 b 1 one 1 two 1 ...
9,.,address,list,"0 [{'zip': '07014', 'city': 'cl..."


### Find Original Df

In [54]:
original_df = mp[mp.parent == '.']
original_df

Unnamed: 0,parent,child,type,obj
0,.,id,int,"0 1001 1 1002 2 1003 Name: id, dtype:..."
1,.,user_info,dict,"0 {'first_name': 'john', 'last_name': 'smit..."
7,.,name_list,list,"0 [a, b] 1 [one, two, t..."
9,.,address,list,"0 [{'zip': '07014', 'city': 'cl..."
14,.,membership,dict,"0 {'membership': True, 'memberid': '1231'} ..."


### Setting for Dicts

In [55]:
dict_items = original_df[original_df.type == 'dict'].child
dict_items

1      user_info
14    membership
Name: child, dtype: object

In [56]:
expand_dicts = False
expand_lists = True

if expand_dicts and expand_lists:
    dataframe = mp[(mp.type != 'dict') & (mp.type != 'list')]
elif expand_dicts and not expand_lists:
    print('dict True list False')
    dict_items = mp[mp.type == 'dict'].child
    dataframe = mp[~mp.parent.isin(dict_items)]
elif not expand_dicts and expand_lists:
    print('dict False list True')
    dict_items = mp[mp.type == 'dict'].child
    dataframe = mp[(~mp.parent.isin(dict_items)) & (mp.type != 'list')]
else:
    pass
dataframe

dict False list True


Unnamed: 0,parent,child,type,obj
0,.,id,int,"0 1001 1 1002 2 1003 Name: id, dtype:..."
1,.,user_info,dict,"0 {'first_name': 'john', 'last_name': 'smit..."
8,,name_list,str,0 a 0 b 1 one 1 two 1 ...
10,,address,dict,"0 {'zip': '07014', 'city': 'cl..."
14,.,membership,dict,"0 {'membership': True, 'memberid': '1231'} ..."


In [57]:
list_items = mp[mp.type == 'list'].child
mp[mp.child.isin(list_items)]

Unnamed: 0,parent,child,type,obj
7,.,name_list,list,"0 [a, b] 1 [one, two, t..."
8,,name_list,str,0 a 0 b 1 one 1 two 1 ...
9,.,address,list,"0 [{'zip': '07014', 'city': 'cl..."
10,,address,dict,"0 {'zip': '07014', 'city': 'cl..."
