<a href="https://colab.research.google.com/github/YogeshThakare007/World-Bank-Data-EDA/blob/main/World%20Bank%20EDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Exploratory Data Analysis on the World Bank **Data**
The objective of this project is to collect data from the World Bank Open APIs and prepare and analyse the data using Python.


In [1]:
import pandas as pd
import numpy as np
import requests

from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Base URL used in all the API calls
BASE_URL='http://api.worldbank.org/v2/'

# List of indicators according to the features defined above
INDICATOR_CODES=['SP.POP.TOTL', 'SP.POP.TOTL.FE.IN', 'SP.POP.TOTL.MA.IN',
 'SP.DYN.CBRT.IN', 'SP.DYN.CDRT.IN',
 'SE.COM.DURS',
 'SL.IND.EMPL.ZS', 'SL.AGR.EMPL.ZS', 'SL.AGR.EMPL.FE.ZS', 'SL.IND.EMPL.FE.ZS', 'SL.UEM.TOTL.ZS',
 'NY.GDP.MKTP.CD',
 'NY.ADJ.NNTY.PC.KD.ZG', 'NY.GSR.NFCY.CD', 'NV.AGR.TOTL.CD',
 'EG.USE.ELEC.KH.PC', 'EG.FEC.RNEW.ZS', 'EG.USE.COMM.FO.ZS']

COUNTRY_LIST=['USA', 'India', 'China', 'Japan', 'Canada', 'Great Britain', 'South Africa']

# mapping of feature codes to more meaningful names
featureMap={
    "SP.POP.TOTL": "Total Population",
    "SP.POP.TOTL.FE.IN": "Female Population",
    "SP.POP.TOTL.MA.IN": "Male Population",
    "SP.DYN.CBRT.IN": "Birth Rate",
    "SP.DYN.CDRT.IN": "Death Rate",
    "SE.COM.DURS": "Compulsory Education Dur.",
    "SL.IND.EMPL.ZS":"Employment in Industry(%)",
    "SL.AGR.EMPL.ZS": "Employment in Agriculture(%)",
    "SL.AGR.EMPL.FE.ZS": "Female Employment in Agriculture(%)",
    "SL.IND.EMPL.FE.ZS": "Female Employment in Industry(%)",
    "SL.UEM.TOTL.ZS": "Unemployment(%)",
    "NY.GDP.MKTP.CD": "GDP in USD",
    "NY.ADJ.NNTY.PC.KD.ZG":"National Income per Capita",
    "NY.GSR.NFCY.CD":"Net income from Abroad",
    "NV.AGR.TOTL.CD":"Agriculture value added(in USD)",
    "EG.USE.ELEC.KH.PC":"Electric Power Consumption(kWH per capita)",
    "EG.FEC.RNEW.ZS":"Renewable Energy Consumption (%)",
    "EG.USE.COMM.FO.ZS":"Fossil Fuel Consumption (%)"
}

# Mapping of country codes to their actual names
countryMap={
    "US": "USA",
    "IN":"India",
    "CN": "China",
    "JP": "Japan",
    "CA": "Canada",
    "GB": "Great Britain",
    "ZA": "South Africa"
}

# constant parameters used in sending the request.
params = dict()
# to ensure we receive a JSON response
params['format']='json'
# The data we fetch is for 59 years.
# Hence we change the default page size of 50 to 100 to ensure we need only one API call per feature.
params['per_page']='100'
# Range of years for which the data is needed
params['date']='1960:2018'

In [5]:
# Function to get JSON data from the endpoint
def loadJSONData(country_code): 
    dataList=[]
    
    # iterate over each indicator code specified in the contant INDICATOR_CODES defined above
    for indicator in INDICATOR_CODES: 
        
        # form the URL in the desired format
        # E.g: http://api.worldbank.org/v2/countries/us/indicators/SP.POP.TOTL?format=json&per_page=200&date=1960:2018
        url=BASE_URL+'countries/'+country_code.lower()+'/indicators/'+indicator
        
        # send the request using the resquests module
        response = requests.get(url, params=params)
        
        # validate the response status code
        # The API returns a status_code 200 even for error messages,
        # however, the response body contains a field called "message" that includes the details of the error
        # check if message is not present in the response
        if response.status_code == 200 and ("message" not in response.json()[0].keys()):
            # print("Successfully got data for: " + str(featureMap[indicator]))
            
            # list of values for one feature
            indicatorVals=[]
            
            # the response is an array containing two arrays - [[{page: 1, ...}], [{year: 2018, SP.POP.TOTL: 123455}, ...]]
            # hence we check if the length of the response is >1
            if len(response.json()) > 1:
                
                # if yes, iterate over each object in the response
                # each object gives one single value for each year
                for obj in response.json()[1]:
                    
                    # check for empty values
                    if obj['value'] is "" or obj['value'] is None:
                        indicatorVals.append(None)
                    else:
                    # if a value is present, add it to the list of indicator values
                        indicatorVals.append(float(obj['value']))
                dataList.append(indicatorVals)
        else:
            # print an error message if the API call failed
            print("Error in Loading the data. Status Code: " + str(response.status_code))
            
    # Once all the features have been obtained, add the values for the "Year"
    # The API returns the indicator values from the most recent year. Hence, we create a list of years in reverse order
    dataList.append([year for year in range(2018, 1959, -1)])
    # return the list of lists of feature values [[val1,val2,val3...], [val1,val2,val3...], [val1,val2,val3...], ...]
    return dataList

#----------------------------------------------------------------------------------------------------

# function to invokde the loadJSONData function and form the final DataFrame for each country
def getCountrywiseDF(country_code):
    
    # The resulting dataframe needs to have meaningful column names
    # hence we create a list of column names from the map defined above
    col_list=list(featureMap.values())
    # append the year column name
    col_list.append('Year')
    
    print("------------------Loading data for: "+countryMap[country_code]+"-----------------------")
    
    # for the given country call the loadJSONData function and fetch the data from the API
    dataList=loadJSONData(country_code)
    
    # transform the list of lists of features into a DataFrame
    # np.column_stack is used to add each list as a column 
    df=pd.DataFrame(np.column_stack(dataList), columns=col_list)
    
    # add the country column by extracting the country name from the map using the country code
    df['Country'] = countryMap[country_code]
    
    # display the resulting dataframe
    display(df.head())
    
    # return the formed dataframe for the given country
    return df
# Call the getCountrywiseDF function with the code of each country under consideration
# We will have a seperate dataframe for each country - 7 data frames

US_df=getCountrywiseDF('US')
IN_df=getCountrywiseDF('IN')
CN_df=getCountrywiseDF('CN')
JP_df=getCountrywiseDF('JP')
CA_df=getCountrywiseDF('CA')
GB_df=getCountrywiseDF('GB')
ZA_df=getCountrywiseDF('ZA')

print("Data Loading Completed")


  if obj['value'] is "" or obj['value'] is None:


------------------Loading data for: USA-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,326838199.0,164926348.0,161911851.0,11.6,8.678,12.0,19.56929,1.708719,0.975559,8.580613,3.9,20533057312000.0,1.83305,290307000000.0,185598437000.0,,10.12,,2018,USA
1,325122128.0,164151818.0,160970309.0,11.8,8.638,12.0,19.42705,1.768335,0.961346,8.535977,4.36,19477336549000.0,2.135014,292949000000.0,184424799000.0,,9.92,,2017,USA
2,323071755.0,163224028.0,159847727.0,12.2,8.493,12.0,19.47486,1.781334,0.97919,8.532497,4.87,18695110842000.0,-0.111701,231944000000.0,176228444000.0,,9.46,,2016,USA
3,320738994.0,162158414.0,158580581.0,12.4,8.44,12.0,19.55526,1.781391,0.969498,8.661249,5.28,18206020741000.0,3.214351,220383000000.0,188238199000.0,,9.03,82.427828,2015,USA
4,318386329.0,161084758.0,157301571.0,12.5,8.237,12.0,19.70807,1.661639,0.888076,8.714112,6.17,17550680174000.0,2.182302,235226000000.0,205705398000.0,12993.965579,9.22,83.089042,2014,USA


------------------Loading data for: India-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,1369003306.0,661854076.0,707149230.0,17.651,6.644,8.0,24.9491,43.32882,55.49442,17.34357,7.65,2702929718960.46,4.695242,-28935365554.060295,433323710255.152,,32.82,,2018,India
1,1354195680.0,654607791.0,699587889.0,17.911,6.593,8.0,24.84621,43.93962,56.25889,17.63662,7.733,2651472946375.05,7.15274,-28673171869.012295,439039677566.795,,32.41,,2017,India
2,1338636340.0,647012921.0,691623419.0,18.514,6.603,8.0,24.71281,44.52136,56.95825,17.88424,7.842,2294797980509.01,6.265538,-46857856463.2037,375516164816.91296,,33.02,,2016,India
3,1322866505.0,639323292.0,683543213.0,18.765,6.67,8.0,24.58389,45.15931,57.70898,18.14038,7.915,2103587813812.75,7.580204,-24405488069.835094,340244981475.06,,33.4,,2015,India
4,1307246509.0,631697152.0,675549357.0,19.049,6.79,8.0,24.52794,45.7835,58.40867,18.4407,7.981,2039127446299.3,5.69479,-24112069637.705296,342408945451.364,797.349232,33.85,73.576979,2014,India


------------------Loading data for: China-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,1402760000.0,685468978.0,717291023.0,10.86,7.08,9.0,27.59801,26.0337,22.54743,24.19599,4.31,13894817549374.2,3.516736,-61028947256.6496,978614814147.521,,13.71,,2018,China
1,1396215000.0,682006802.0,714208198.0,12.64,7.06,9.0,28.10759,26.98,23.44956,25.07476,4.47,12310409370892.8,6.199394,-16130804617.683,918795407202.462,,13.13,,2017,China
2,1387790000.0,677689913.0,710100087.0,13.57,7.04,9.0,28.80041,27.7,24.13284,26.15654,4.56,11233276536737.2,7.591275,-55700290899.2407,905097390133.193,,12.56,,2016,China
3,1379860000.0,673690703.0,706169297.0,11.99,7.07,9.0,29.18079,28.59174,24.97942,26.92164,4.65,11061553079876.4,6.351342,-52782008187.8767,927734103002.8099,,12.18,,2015,China
4,1371860000.0,669700351.0,702159650.0,13.83,7.12,9.0,29.90066,29.49999,25.8256,28.09588,4.63,10475682920594.5,7.981345,13299595834.5549,905464751940.2941,3905.317598,11.94,87.670431,2014,China


------------------Loading data for: Japan-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,126811000.0,65140618.0,61670382.0,7.4,11.0,9.0,24.43857,3.396733,2.936995,13.98164,2.47,5037835383110.97,-0.660257,192796477397.602,52615764504.0709,,7.22,,2018,Japan
1,126972000.0,65206949.0,61765051.0,7.6,10.8,9.0,24.75711,3.325158,2.849124,14.10193,2.82,4930837369151.42,1.817321,182413336169.474,55641568300.4069,,6.92,,2017,Japan
2,127076000.0,65245704.0,61830297.0,7.8,10.5,9.0,24.77501,3.383711,2.900578,14.12386,3.13,5003677627544.24,0.905994,174116141695.326,56290438046.6344,,6.39,,2016,Japan
3,127141000.0,65267090.0,61873910.0,8.0,10.3,9.0,25.08391,3.497725,3.063053,14.33906,3.39,4444930651964.18,4.050869,174840516749.234,45965919991.16689,,6.16,93.026455,2015,Japan
4,127276000.0,65324683.0,61951317.0,8.0,10.1,9.0,25.306,3.524867,3.146744,14.33803,3.59,4896994405353.29,0.429799,182026899407.216,48803725389.1671,7819.714636,5.58,94.407469,2014,Japan


------------------Loading data for: Canada-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,37065084.0,18659541.0,18405543.0,10.1,7.7,10.0,19.57131,1.486745,0.936367,8.463358,5.83,1725329192783.02,0.107327,-29334899428.9271,29328725610.2532,,21.99,,2018,Canada
1,36545236.0,18406915.0,18138320.0,10.3,7.5,10.0,19.53366,1.518612,0.961973,8.486851,6.34,1649265644244.09,3.947807,-20974072080.866,31171032151.0368,,22.47,,2017,Canada
2,36109487.0,18192222.0,17917265.0,10.6,7.4,10.0,19.47841,1.944528,1.129702,8.651059,7.0,1527994741907.43,-0.211138,-18668313909.4663,28454713729.5458,,22.05,,2016,Canada
3,35702908.0,17988513.0,17714395.0,10.7,7.4,10.0,19.92307,1.643415,1.015167,8.557106,6.91,1556508816217.14,-3.530433,-23844486211.652,29104161348.0672,,22.69,74.089062,2015,Canada
4,35437435.0,17855282.0,17582153.0,10.9,7.3,10.0,20.17706,1.714031,1.056316,8.615779,6.91,1805749878439.94,1.423773,-29178622922.1416,28649089979.3945,15588.487146,22.49,73.192874,2014,Canada


------------------Loading data for: Great Britain-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,66460344.0,33662065.0,32798279.0,11.0,9.2,11.0,18.12584,1.070025,0.618907,7.469033,4.0,2878152147315.82,0.927874,-39229883074.5982,16317101948.0858,,10.99,,2018,Great Britain
1,66058859.0,33477363.0,32581496.0,11.4,9.2,11.0,18.26126,1.163051,0.650689,7.691214,4.33,2683399006715.79,2.018576,-28893082281.1085,15896986741.0357,,9.7,,2017,Great Britain
2,65611593.0,33270675.0,32340918.0,11.8,9.1,11.0,18.47305,1.125685,0.627984,7.626142,4.81,2699659680997.2,0.990839,-63118855915.2977,15477270586.0584,,8.6,,2016,Great Britain
3,65116219.0,33041408.0,32074811.0,11.9,9.2,11.0,18.66107,1.138458,0.647011,7.69417,5.3,2934857946213.47,2.432155,-63810692067.09129,18473888200.6193,,8.61,80.351771,2015,Great Britain
4,64602298.0,32802913.0,31799385.0,12.0,8.8,11.0,18.96643,1.25617,0.713861,7.959467,6.11,3065223279583.79,3.14538,-54795748844.8578,22840749218.2058,5130.390253,7.36,82.716578,2014,Great Britain


------------------Loading data for: South Africa-----------------------


Unnamed: 0,Total Population,Female Population,Male Population,Birth Rate,Death Rate,Compulsory Education Dur.,Employment in Industry(%),Employment in Agriculture(%),Female Employment in Agriculture(%),Female Employment in Industry(%),Unemployment(%),GDP in USD,National Income per Capita,Net income from Abroad,Agriculture value added(in USD),Electric Power Consumption(kWH per capita),Renewable Energy Consumption (%),Fossil Fuel Consumption (%),Year,Country
0,57339635.0,29517286.0,27822349.0,21.137,8.765,9.0,20.58729,15.69041,16.57739,10.02311,24.22,404159690890.846,-0.146707,-11288811310.3469,9158617157.11408,,10.19,,2018,South Africa
1,56641209.0,29175940.0,27465269.0,20.643,8.802,9.0,20.53531,16.79855,17.66136,10.48616,23.99,381448814653.456,2.347842,-10575832463.2499,9512565346.393353,,10.45,,2017,South Africa
2,56422274.0,29004108.0,27418165.0,20.25,8.924,9.0,20.73353,16.05315,16.33636,10.41636,24.02,323585509674.481,-1.373576,-8291430980.09355,7811956311.86773,,10.53,,2016,South Africa
3,55876504.0,28721087.0,27155417.0,21.3,9.259,9.0,21.34431,15.46671,16.11247,10.32776,22.87,346709790458.563,1.273791,-7943171747.43347,7745421796.45039,,10.28,,2015,South Africa
4,54729551.0,28228865.0,26500685.0,22.057,9.432,9.0,21.14231,14.02137,14.73459,10.71216,22.61,381198869776.106,0.083527,-9453635055.88138,8103329441.92394,4183.82749,9.85,86.791432,2014,South Africa


Data Loading Completed
