In [None]:
!pip install wbgapi

Collecting wbgapi
  Downloading wbgapi-1.0.12-py3-none-any.whl (36 kB)
Installing collected packages: wbgapi
Successfully installed wbgapi-1.0.12


In [None]:
import pandas as pd
import numpy as np

import wbgapi as wb
import requests

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score

In [None]:
# For searching indicators
wb.series.info(q='GDP')


id,value
EG.GDP.PUSE.KO.PP,GDP per unit of energy use (PPP $ per kg of oil equivalent)
EG.GDP.PUSE.KO.PP.KD,GDP per unit of energy use (constant 2017 PPP $ per kg of oil equivalent)
EG.USE.COMM.GD.PP.KD,"Energy use (kg of oil equivalent) per $1,000 GDP (constant 2017 PPP)"
NY.GDP.DEFL.KD.ZG,"Inflation, GDP deflator (annual %)"
NY.GDP.DEFL.KD.ZG.AD,"Inflation, GDP deflator: linked series (annual %)"
NY.GDP.DEFL.ZS,GDP deflator (base year varies by country)
NY.GDP.DEFL.ZS.AD,GDP deflator: linked series (base year varies by country)
NY.GDP.DISC.CN,Discrepancy in expenditure estimate of GDP (current LCU)
NY.GDP.DISC.KN,Discrepancy in expenditure estimate of GDP (constant LCU)
NY.GDP.MKTP.CD,GDP (current US$)


In [None]:
wb.series.info(q='Foreign')

id,value
BM.KLT.DINV.CD.WD,"Foreign direct investment, net outflows (BoP, current US$)"
BM.KLT.DINV.WD.GD.ZS,"Foreign direct investment, net outflows (% of GDP)"
BN.KLT.DINV.CD,"Foreign direct investment, net (BoP, current US$)"
BX.KLT.DINV.CD.WD,"Foreign direct investment, net inflows (BoP, current US$)"
BX.KLT.DINV.WD.GD.ZS,"Foreign direct investment, net inflows (% of GDP)"
FM.AST.NFRG.CN,Net foreign assets (current LCU)
,6 elements


In [None]:
wb.series.info(q='Government')

id,value
FM.AST.CGOV.ZG.M3,Claims on central government (annual growth as % of broad money)
FS.AST.CGOV.GD.ZS,"Claims on central government, etc. (% GDP)"
GC.DOD.TOTL.CN,"Central government debt, total (current LCU)"
GC.DOD.TOTL.GD.ZS,"Central government debt, total (% of GDP)"
GE.EST,Government Effectiveness: Estimate
GE.NO.SRC,Government Effectiveness: Number of Sources
GE.PER.RNK,Government Effectiveness: Percentile Rank
GE.PER.RNK.LOWER,"Government Effectiveness: Percentile Rank, Lower Bound of 90% Confidence Interval"
GE.PER.RNK.UPPER,"Government Effectiveness: Percentile Rank, Upper Bound of 90% Confidence Interval"
GE.STD.ERR,Government Effectiveness: Standard Error


In [None]:
# Collecting and labelling all keys and placing them into a dataframe

vars_dict = {'code': ['NY.GDP.MKTP.KD.ZG', 'NY.GDP.MKTP.KD','NY.GDP.PCAP.KD',
                      'NE.CON.GOVT.ZS', 'NE.CON.PRVT.ZS', 'GC.NFN.TOTL.GD.ZS','NY.GDP.DEFL.KD.ZG', 'EG.FEC.RNEW.ZS',
                      'SL.UEM.TOTL.NE.ZS','SL.EMP.TOTL.SP.NE.ZS','HD.HCI.OVRL','SL.AGR.EMPL.ZS','SL.EMP.VULN.ZS',
                      # Environment:
                      'EN.POP.SLUM.UR.ZS','ER.H2O.INTR.PC',
                      'NE.IMP.GNFS.ZS','NE.EXP.GNFS.ZS','BX.KLT.DINV.CD.WD','GC.DOD.TOTL.GD.ZS','FR.INR.RINR',

                      'SI.POV.GINI','BN.CAB.XOKA.GD.ZS','SP.POP.TOTL','EN.POP.DNST','SP.POP.GROW',
                      'SP.DYN.LE00.IN','SP.DYN.CDRT.IN', 'SH.DYN.MORT',
                      # Agric:
                      'EG.ELC.RNEW.ZS','AG.LND.AGRI.ZS',
                      # Climate change:
                      'EG.ELC.ACCS.ZS','EG.USE.ELEC.KH.PC',
                      # Energy & Mining:
                      'TX.VAL.FUEL.ZS.UN','TX.VAL.MMTL.ZS.UN',
                      'EN.ATM.CO2E.PC','EN.CLC.GHGR.MT.CE','NV.IND.TOTL.ZS','NV.SRV.EMPL.KD',
                      'SE.ENR.PRSC.FM.ZS','SE.ADT.LITR.ZS','SE.TER.CUAT.BA.ZS','VC.IHR.PSRC.P5','SP.POP.SCIE.RD.P6'],
            'label': ['GDP Growth%', 'GDP','GDP Per Capita',
                      'GDP Government pp', 'GDP Private pp', 'Investment', 'Inflation', 'Renewable Energy',
                      'Unemployment','Employment','Human Capital Index','Agriculture Employment','Vulnerable Employment',
                      #Environment:
                      'Population in Slums','Freshwater Resources',
                      'Imports','Exports','FDI','Government Debt','Real Interest Rate',

                      'Gini Index','Current Account Balance','Population','Population Density','Population Growth',
                      'Life Expectancy','Death Rate', 'Child Mortality',
                      # Agriculture:
                      'Renewable Energy Output','Agricultural Land',
                      # Climate Change:
                      'Electricity Access','Power Consumption',
                      # Energy & Mining:
                      'Fuel Exports','Metal Exports',
                      'CO2 Emissions','ghg Emissions','Industry GDP','Value Added Services',
                      'School Enrollment','Literacy','Bachelor','Homicide','Research']}

Variables = pd.DataFrame(vars_dict)

In [None]:
len(Variables)

43

In [None]:
# Variabile code, label and definition
Variables = Variables.assign(definition='')
for i in range(0,len(Variables)):
    Variables.iloc[i,2] = wb.series.get(id=Variables.iloc[i,0])['value']

In [None]:
Variables

Unnamed: 0,code,label,definition
0,NY.GDP.MKTP.KD.ZG,GDP Growth%,GDP growth (annual %)
1,NY.GDP.MKTP.KD,GDP,GDP (constant 2015 US$)
2,NY.GDP.PCAP.KD,GDP Per Capita,GDP per capita (constant 2015 US$)
3,NE.CON.GOVT.ZS,GDP Government pp,General government final consumption expenditu...
4,NE.CON.PRVT.ZS,GDP Private pp,Households and NPISHs final consumption expend...
5,GC.NFN.TOTL.GD.ZS,Investment,Net investment in nonfinancial assets (% of GDP)
6,NY.GDP.DEFL.KD.ZG,Inflation,"Inflation, GDP deflator (annual %)"
7,EG.FEC.RNEW.ZS,Renewable Energy,Renewable energy consumption (% of total final...
8,SL.UEM.TOTL.NE.ZS,Unemployment,"Unemployment, total (% of total labor force) (..."
9,SL.EMP.TOTL.SP.NE.ZS,Employment,"Employment to population ratio, 15+, total (%)..."


In [None]:
# Function for obtaining a clean dataframe with columns "Country Code", "Year", and Economic indicators:
def vert_df(gdp_df, name):
    gdp_df.columns = gdp_df.columns.str.replace('YR','')
    gdp_df = gdp_df.reset_index()
    gdp_df.drop('Country', inplace=True, axis=1) # Dropping "Country" - we are using the Country code instead
    year_drop = list(range(1960, 1992)) # Dropping all years before 1992
    year_drop = [*map(str,year_drop)]
    gdp_df.drop(year_drop, inplace=True, axis=1)
    gdp_df = gdp_df.melt(id_vars = ['economy'], var_name = 'Year', value_name = name)
    gdp_df.Year = pd.to_numeric(gdp_df.Year)
    return gdp_df

In [None]:
# loop for creating dataframe to export:
gdp_df = vert_df(wb.data.DataFrame(Variables.iloc[0,0], labels=True), Variables.iloc[0,1])
for i in range(1, len(Variables)):
    wbcode, wblabel = Variables.iloc[i,0], Variables.iloc[i,1]
    new_data = vert_df(wb.data.DataFrame(wbcode, labels=True), wblabel)
    gdp_df = pd.merge(gdp_df, new_data, how='left', on=['economy', 'Year'])

In [None]:
# Replacing label:
gdp_df.rename(columns = {'economy':'Country Code'}, inplace = True)

In [None]:
gdp_df

Unnamed: 0,Country Code,Year,GDP Growth%,GDP,GDP Per Capita,GDP Government pp,GDP Private pp,Investment,Inflation,Renewable Energy,...,Metal Exports,CO2 Emissions,ghg Emissions,Industry GDP,Value Added Services,School Enrollment,Literacy,Bachelor,Homicide,Research
0,ZWE,1992,-9.015570,1.600947e+10,1504.437140,24.158070,64.863924,,-14.129658,64.37,...,15.104396,1.735620,,37.212145,4104.536704,0.94515,83.512581,,,
1,ZMB,1992,-1.730922,6.720005e+09,832.267134,,,,165.533954,82.73,...,,0.352722,,43.734804,4145.485858,,,,9.511616,
2,YEM,1992,8.207598,2.750967e+10,1905.924245,17.471673,82.081394,,13.171745,1.31,...,,0.632544,,31.388646,4770.029730,,,,,
3,PSE,1992,,,,,,,,21.53,...,,,,,,,,,,
4,VIR,1992,,,,,,,,0.47,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8241,CEB,2022,4.127586,1.630930e+12,16291.667724,18.807348,56.356736,,12.768248,,...,2.943075,,,27.420528,,,99.427521,,,
8242,CSS,2022,13.217205,8.180554e+10,10899.444048,,,,3.993355,,...,3.126871,,,34.926726,,,,,,
8243,ARB,2022,6.033833,2.909301e+12,6260.802664,15.704651,45.574356,,13.241537,,...,,,,48.099052,,,75.171532,,,
8244,AFW,2022,3.766354,8.938137e+11,1822.878716,,,,8.046962,,...,,,,30.059033,,,60.312698,,,


In [None]:
gdp_df.to_csv('WorldBankIndicators.csv', index=False)