# JSON examples and exercise
****
+ get familiar with packages for dealing with JSON
+ study examples with JSON strings and files 
+ work on exercise to be completed and submitted 
****
+ reference: http://pandas.pydata.org/pandas-docs/stable/io.html#io-json-reader
+ data source: http://jsonstudio.com/resources/
****

In [3]:
import pandas as pd

## imports for Python, Pandas

In [4]:
import json
from pandas.io.json import json_normalize

## JSON example, with string

+ demonstrates creation of normalized dataframes (tables) from nested json string
+ source: http://pandas.pydata.org/pandas-docs/stable/io.html#normalization

In [5]:
# define json string
data = [{'state': 'Florida', 
         'shortname': 'FL',
         'info': {'governor': 'Rick Scott'},
         'counties': [{'name': 'Dade', 'population': 12345},
                      {'name': 'Broward', 'population': 40000},
                      {'name': 'Palm Beach', 'population': 60000}]},
        {'state': 'Ohio',
         'shortname': 'OH',
         'info': {'governor': 'John Kasich'},
         'counties': [{'name': 'Summit', 'population': 1234},
                      {'name': 'Cuyahoga', 'population': 1337}]}]

In [6]:
# use normalization to create tables from nested element
json_normalize(data, 'counties')

Unnamed: 0,name,population
0,Dade,12345
1,Broward,40000
2,Palm Beach,60000
3,Summit,1234
4,Cuyahoga,1337


In [8]:
# further populate tables created from nested element
json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])

Unnamed: 0,name,population,info.governor,state,shortname
0,Dade,12345,Rick Scott,Florida,FL
1,Broward,40000,Rick Scott,Florida,FL
2,Palm Beach,60000,Rick Scott,Florida,FL
3,Summit,1234,John Kasich,Ohio,OH
4,Cuyahoga,1337,John Kasich,Ohio,OH


****
## JSON example, with file

+ demonstrates reading in a json file as a string and as a table
+ uses small sample file containing data about projects funded by the World Bank 
+ data source: http://jsonstudio.com/resources/

In [9]:
# load json as string
json.load((open('data/world_bank_projects_less.json')))

[{u'_id': {u'$oid': u'52b213b38594d8a2be17c780'},
  u'approvalfy': 1999,
  u'board_approval_month': u'November',
  u'boardapprovaldate': u'2013-11-12T00:00:00Z',
  u'borrower': u'FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA',
  u'closingdate': u'2018-07-07T00:00:00Z',
  u'country_namecode': u'Federal Democratic Republic of Ethiopia!$!ET',
  u'countrycode': u'ET',
  u'countryname': u'Federal Democratic Republic of Ethiopia',
  u'countryshortname': u'Ethiopia',
  u'docty': u'Project Information Document,Indigenous Peoples Plan,Project Information Document',
  u'envassesmentcategorycode': u'C',
  u'grantamt': 0,
  u'ibrdcommamt': 0,
  u'id': u'P129828',
  u'idacommamt': 130000000,
  u'impagency': u'MINISTRY OF EDUCATION',
  u'lendinginstr': u'Investment Project Financing',
  u'lendinginstrtype': u'IN',
  u'lendprojectcost': 550000000,
  u'majorsector_percent': [{u'Name': u'Education', u'Percent': 46},
   {u'Name': u'Education', u'Percent': 26},
   {u'Name': u'Public Administration, Law, and Ju

In [10]:
# load as Pandas dataframe
sample_json_df = pd.read_json('data/world_bank_projects_less.json')
sample_json_df

Unnamed: 0,_id,approvalfy,board_approval_month,boardapprovaldate,borrower,closingdate,country_namecode,countrycode,countryname,countryshortname,...,sectorcode,source,status,supplementprojectflg,theme1,theme_namecode,themecode,totalamt,totalcommamt,url
0,{u'$oid': u'52b213b38594d8a2be17c780'},1999,November,2013-11-12T00:00:00Z,FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA,2018-07-07T00:00:00Z,Federal Democratic Republic of Ethiopia!$!ET,ET,Federal Democratic Republic of Ethiopia,Ethiopia,...,"ET,BS,ES,EP",IBRD,Active,N,"{u'Percent': 100, u'Name': u'Education for all'}","[{u'code': u'65', u'name': u'Education for all'}]",65,130000000,130000000,http://www.worldbank.org/projects/P129828/ethi...
1,{u'$oid': u'52b213b38594d8a2be17c781'},2015,November,2013-11-04T00:00:00Z,GOVERNMENT OF TUNISIA,,Republic of Tunisia!$!TN,TN,Republic of Tunisia,Tunisia,...,"BZ,BS",IBRD,Active,N,"{u'Percent': 30, u'Name': u'Other economic man...","[{u'code': u'24', u'name': u'Other economic ma...",5424,0,4700000,http://www.worldbank.org/projects/P144674?lang=en


****
## JSON exercise

Using data in file 'data/world_bank_projects.json' and the techniques demonstrated above,
1. Find the 10 countries with most projects
2. Find the top 10 major project themes (using column 'mjtheme_namecode')
3. In 2. above you will notice that some entries have only the code and the name is missing. Create a dataframe with the missing names filled in.

In [111]:
#Load in the data as projects, a normalized table of all the columns of the dataset
data = json.load(open('data/world_bank_projects.json'))
projects = json_normalize(data)

# 1. Find the 10 countries with the most projects

# Import the Counter object from collections, create the counter country_counter from the 'countryshortname' column of projects, and display a list of the ten most common countries with their counts
from collections import Counter
country_counter = Counter(projects['countryshortname'])
country_counter.most_common(10)

[('China', 19),
 ('Indonesia', 19),
 ('Vietnam', 17),
 ('India', 16),
 ('Yemen, Republic of', 13),
 ('Morocco', 12),
 ('Nepal', 12),
 ('Bangladesh', 12),
 ('Mozambique', 11),
 ('Africa', 11)]

In [112]:
# 2. Find the top 10 major project themes (using column 'mjtheme_namecode')

# Create an empty list and append the name of every theme for each project to it
mjthemes = projects['mjtheme_namecode']
themes = []
for entry in mjthemes:
    for i in entry:
        themes.append(i['name'])

# Create a counter for the now populated list of all the themes from the data, and display a list of the ten most common themes and their counts
themes_counter = Counter(themes)
themes_counter.most_common(10)

[('Environment and natural resources management', 223),
 ('Rural development', 202),
 ('Human development', 197),
 ('Public sector governance', 184),
 ('Social protection and risk management', 158),
 ('Financial and private sector development', 130),
 ('', 122),
 ('Social dev/gender/inclusion', 119),
 ('Trade and integration', 72),
 ('Urban development', 47)]

In [119]:
# 3. Create a dataframe with the missing names filled in.

# Make a dictionary with the theme code as the key and the theme name as the value
theme_dict = {}
for entry in projects['mjtheme_namecode']:
    for val in entry:
        if val['name'] != '':
            theme_dict[val['code']] = val['name']
            
# A simple function using theme_dict to return a theme name given a code
def filler_in(code):
    return theme_dict[code]
    

# Loop through each project in the data, and loop through all of each project's themes; reassign entries with an empty theme name to the correct name corresponding to its theme code 
num_proj = len(projects['mjtheme_namecode'])
for i in range(num_proj):
    num_themes_per_proj = len(projects['mjtheme_namecode'][i])
    for j in range(num_themes_per_proj):
        if projects['mjtheme_namecode'][i][j]['name'] == '':
            projects['mjtheme_namecode'][i][j]['name'] = filler_in(projects['mjtheme_namecode'][i][j]['code'])
        
# To check if empty string theme names have been filled in, we can make a new counter similar to the one in 2, and confirm the count for '' is 0
mjthemes = projects['mjtheme_namecode']
themes = []
for entry in mjthemes:
    for i in entry:
        themes.append(i['name'])
themes_counter_2 = Counter(themes)
print(themes_counter_2[''] == 0)

projects

True


Unnamed: 0,_id.$oid,approvalfy,board_approval_month,boardapprovaldate,borrower,closingdate,country_namecode,countrycode,countryname,countryshortname,docty,envassesmentcategorycode,grantamt,ibrdcommamt,id,idacommamt,impagency,lendinginstr,lendinginstrtype,lendprojectcost,majorsector_percent,mjsector_namecode,mjtheme,mjtheme_namecode,mjthemecode,prodline,prodlinetext,productlinetype,project_abstract.cdata,project_name,projectdocs,projectfinancialtype,projectstatusdisplay,regionname,sector,sector1.Name,sector1.Percent,sector2.Name,sector2.Percent,sector3.Name,sector3.Percent,sector4.Name,sector4.Percent,sector_namecode,sectorcode,source,status,supplementprojectflg,theme1.Name,theme1.Percent,theme_namecode,themecode,totalamt,totalcommamt,url
0,52b213b38594d8a2be17c780,1999,November,2013-11-12T00:00:00Z,FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA,2018-07-07T00:00:00Z,Federal Democratic Republic of Ethiopia!$!ET,ET,Federal Democratic Republic of Ethiopia,Ethiopia,"Project Information Document,Indigenous People...",C,0,0,P129828,130000000,MINISTRY OF EDUCATION,Investment Project Financing,IN,550000000,"[{'Percent': 46, 'Name': 'Education'}, {'Perce...","[{'code': 'EX', 'name': 'Education'}, {'code':...",[Human development],"[{'code': '8', 'name': 'Human development'}, {...",811,PE,IBRD/IDA,L,The development objective of the Second Phase ...,Ethiopia General Education Quality Improvement...,"[{'DocDate': '28-AUG-2013', 'EntityID': '09022...",IDA,Active,Africa,"[{'Name': 'Primary education'}, {'Name': 'Seco...",Primary education,46,Secondary education,26.0,Public administration- Other social services,16.0,Tertiary education,12.0,"[{'code': 'EP', 'name': 'Primary education'}, ...","ET,BS,ES,EP",IBRD,Active,N,Education for all,100,"[{'code': '65', 'name': 'Education for all'}]",65,130000000,130000000,http://www.worldbank.org/projects/P129828/ethi...
1,52b213b38594d8a2be17c781,2015,November,2013-11-04T00:00:00Z,GOVERNMENT OF TUNISIA,,Republic of Tunisia!$!TN,TN,Republic of Tunisia,Tunisia,"Project Information Document,Integrated Safegu...",C,4700000,0,P144674,0,MINISTRY OF FINANCE,Specific Investment Loan,IN,5700000,"[{'Percent': 70, 'Name': 'Public Administratio...","[{'code': 'BX', 'name': 'Public Administration...","[Economic management, Social protection and ri...","[{'code': '1', 'name': 'Economic management'},...",16,RE,Recipient Executed Activities,L,,TN: DTF Social Protection Reforms Support,"[{'DocDate': '29-MAR-2013', 'EntityID': '00033...",OTHER,Active,Middle East and North Africa,[{'Name': 'Public administration- Other social...,Public administration- Other social services,70,General public administration sector,30.0,,,,,"[{'code': 'BS', 'name': 'Public administration...","BZ,BS",IBRD,Active,N,Other economic management,30,"[{'code': '24', 'name': 'Other economic manage...",5424,0,4700000,http://www.worldbank.org/projects/P144674?lang=en
2,52b213b38594d8a2be17c782,2014,November,2013-11-01T00:00:00Z,MINISTRY OF FINANCE AND ECONOMIC DEVEL,,Tuvalu!$!TV,TV,Tuvalu,Tuvalu,"Resettlement Plan,Environmental Assessment,Int...",B,0,0,P145310,6060000,MINISTRY OF TRANSPORT AND COMMUNICATIONS,Investment Project Financing,IN,6060000,"[{'Percent': 100, 'Name': 'Transportation'}]","[{'code': 'TX', 'name': 'Transportation'}]","[Trade and integration, Public sector governan...","[{'code': '5', 'name': 'Trade and integration'...",52116,PE,IBRD/IDA,L,,Tuvalu Aviation Investment Project - Additiona...,"[{'DocDate': '21-OCT-2013', 'EntityID': '00033...",IDA,Active,East Asia and Pacific,[{'Name': 'Rural and Inter-Urban Roads and Hig...,Rural and Inter-Urban Roads and Highways,100,,,,,,,"[{'code': 'TI', 'name': 'Rural and Inter-Urban...",TI,IBRD,Active,Y,Regional integration,46,"[{'code': '47', 'name': 'Regional integration'...",52812547,6060000,6060000,http://www.worldbank.org/projects/P145310?lang=en
3,52b213b38594d8a2be17c783,2014,October,2013-10-31T00:00:00Z,MIN. OF PLANNING AND INT'L COOPERATION,,Republic of Yemen!$!RY,RY,Republic of Yemen,"Yemen, Republic of","Procurement Plan,Project Information Document,...",C,1500000,0,P144665,0,LABOR INTENSIVE PUBLIC WORKS PROJECT PMU,Technical Assistance Loan,IN,1500000,"[{'Percent': 100, 'Name': 'Health and other so...","[{'code': 'JX', 'name': 'Health and other soci...","[Social dev/gender/inclusion, Social dev/gende...","[{'code': '7', 'name': 'Social dev/gender/incl...",77,RE,Recipient Executed Activities,L,,Gov't and Civil Society Organization Partnership,"[{'DocDate': '15-MAY-2013', 'EntityID': '00035...",OTHER,Active,Middle East and North Africa,[{'Name': 'Other social services'}],Other social services,100,,,,,,,"[{'code': 'JB', 'name': 'Other social services'}]",JB,IBRD,Active,N,Participation and civic engagement,50,"[{'code': '57', 'name': 'Participation and civ...",5957,0,1500000,http://www.worldbank.org/projects/P144665?lang=en
4,52b213b38594d8a2be17c784,2014,October,2013-10-31T00:00:00Z,MINISTRY OF FINANCE,2019-04-30T00:00:00Z,Kingdom of Lesotho!$!LS,LS,Kingdom of Lesotho,Lesotho,"Project Information Document,Integrated Safegu...",B,0,0,P144933,13100000,MINISTRY OF TRADE AND INDUSTRY,Investment Project Financing,IN,15000000,"[{'Percent': 50, 'Name': 'Industry and trade'}...","[{'code': 'YX', 'name': 'Industry and trade'},...","[Trade and integration, Financial and private ...","[{'code': '5', 'name': 'Trade and integration'...",54,PE,IBRD/IDA,L,The development objective of the Second Privat...,Second Private Sector Competitiveness and Econ...,"[{'DocDate': '06-SEP-2013', 'EntityID': '09022...",IDA,Active,Africa,[{'Name': 'General industry and trade sector'}...,General industry and trade sector,50,Other industry,40.0,SME Finance,10.0,,,"[{'code': 'YZ', 'name': 'General industry and ...","FH,YW,YZ",IBRD,Active,N,Export development and competitiveness,30,"[{'code': '45', 'name': 'Export development an...",4145,13100000,13100000,http://www.worldbank.org/projects/P144933/seco...
5,52b213b38594d8a2be17c785,2014,October,2013-10-31T00:00:00Z,REPUBLIC OF KENYA,,Republic of Kenya!$!KE,KE,Republic of Kenya,Kenya,"Integrated Safeguards Data Sheet,Project Infor...",C,0,0,P146161,10000000,,Investment Project Financing,IN,66400000,"[{'Percent': 100, 'Name': 'Health and other so...","[{'code': 'JX', 'name': 'Health and other soci...",[Social protection and risk management],"[{'code': '6', 'name': 'Social protection and ...",66,PE,IBRD/IDA,L,,Additional Financing for Cash Transfers for Or...,"[{'DocDate': '16-SEP-2013', 'EntityID': '09022...",IDA,Active,Africa,[{'Name': 'Other social services'}],Other social services,100,,,,,,,"[{'code': 'JB', 'name': 'Other social services'}]",JB,IBRD,Active,Y,Social safety nets,100,"[{'code': '54', 'name': 'Social safety nets'}]",54,10000000,10000000,http://www.worldbank.org/projects/P146161?lang=en
6,52b213b38594d8a2be17c786,2014,October,2013-10-29T00:00:00Z,GOVERNMENT OF INDIA,2019-06-30T00:00:00Z,Republic of India!$!IN,IN,Republic of India,India,"Project Appraisal Document,Procurement Plan,In...",A,0,500000000,P121185,0,MINISTRY OF ROAD TRANSPORT AND HIGHWAYS,Specific Investment Loan,IN,1146050000,"[{'Percent': 100, 'Name': 'Transportation'}]","[{'code': 'TX', 'name': 'Transportation'}]","[Public sector governance, Financial and priva...","[{'code': '2', 'name': 'Public sector governan...",24,PE,IBRD/IDA,L,The development objective of the project is to...,National Highways Interconnectivity Improvemen...,"[{'DocDate': '02-OCT-2013', 'EntityID': '00035...",IBRD,Active,South Asia,[{'Name': 'Rural and Inter-Urban Roads and Hig...,Rural and Inter-Urban Roads and Highways,100,,,,,,,"[{'code': 'TI', 'name': 'Rural and Inter-Urban...",TI,IBRD,Active,N,Administrative and civil service reform,20,"[{'code': '25', 'name': 'Administrative and ci...",3925,500000000,500000000,http://www.worldbank.org/projects/P121185/firs...
7,52b213b38594d8a2be17c787,2014,October,2013-10-29T00:00:00Z,PEOPLE'S REPUBLIC OF CHINA,,People's Republic of China!$!CN,CN,People's Republic of China,China,"Project Appraisal Document,Integrated Safeguar...",C,27280000,0,P127033,0,NATIONAL ENERGY ADMINISTRATION,Investment Project Financing,IN,27280000,"[{'Percent': 100, 'Name': 'Energy and mining'}]","[{'code': 'LX', 'name': 'Energy and mining'}]",[Environment and natural resources management],"[{'code': '11', 'name': 'Environment and natur...",118,GE,Global Environment Project,L,The development objective of the Second Phase ...,China Renewable Energy Scale-Up Program Phase II,"[{'DocDate': '18-SEP-2013', 'EntityID': '00045...",OTHER,Active,East Asia and Pacific,[{'Name': 'Other Renewable Energy'}],Other Renewable Energy,100,,,,,,,"[{'code': 'LR', 'name': 'Other Renewable Energ...",LR,IBRD,Active,N,Climate change,100,"[{'code': '81', 'name': 'Climate change'}]",81,0,27280000,http://www.worldbank.org/projects/P127033/chin...
8,52b213b38594d8a2be17c788,2014,October,2013-10-29T00:00:00Z,THE GOVERNMENT OF INDIA,2018-12-31T00:00:00Z,Republic of India!$!IN,IN,Republic of India,India,"Project Appraisal Document,Integrated Safeguar...",B,0,0,P130164,160000000,"PUBLIC WORKS DEPARTMANT, RAJASTHAN",Specific Investment Loan,IN,227000000,"[{'Percent': 100, 'Name': 'Transportation'}]","[{'code': 'TX', 'name': 'Transportation'}]",[Rural development],"[{'code': '10', 'name': 'Rural development'}, ...",107,PE,IBRD/IDA,L,The development objective of the Rajasthan Roa...,Rajasthan Road Sector Modernization Project,"[{'DocDate': '01-OCT-2013', 'EntityID': '00044...",IDA,Active,South Asia,[{'Name': 'Rural and Inter-Urban Roads and Hig...,Rural and Inter-Urban Roads and Highways,100,,,,,,,"[{'code': 'TI', 'name': 'Rural and Inter-Urban...",TI,IBRD,Active,N,Other rural development,87,"[{'code': '79', 'name': 'Other rural developme...",79,160000000,160000000,http://www.worldbank.org/projects/P130164/raja...
9,52b213b38594d8a2be17c789,2014,October,2013-10-29T00:00:00Z,THE KINGDOM OF MOROCCO,2014-12-31T00:00:00Z,Kingdom of Morocco!$!MA,MA,Kingdom of Morocco,Morocco,"Program Document,Project Information Document,...",,0,200000000,P130903,0,MINISTRY OF FINANCE,Development Policy Lending,AD,200000000,"[{'Percent': 34, 'Name': 'Public Administratio...","[{'code': 'BX', 'name': 'Public Administration...","[Public sector governance, Public sector gover...","[{'code': '2', 'name': 'Public sector governan...",222,PE,IBRD/IDA,L,The objective of this First Transparency and A...,MA Accountability and Transparency DPL,"[{'DocDate': '30-SEP-2013', 'EntityID': '00033...",IBRD,Active,Middle East and North Africa,[{'Name': 'General public administration secto...,General public administration sector,34,Central government administration,33.0,Public administration- Information and communi...,33.0,,,"[{'code': 'BZ', 'name': 'General public admini...","BM,BC,BZ",IBRD,Active,N,Other accountability/anti-corruption,33,"[{'code': '29', 'name': 'Other accountability/...",273029,200000000,200000000,http://www.worldbank.org/projects/P130903?lang=en
