In [1]:
pip install wbgapi

Collecting wbgapi
  Obtaining dependency information for wbgapi from https://files.pythonhosted.org/packages/00/12/224030af4886e119a3d03b709f7130e9601a4d15332e1d6a35671b25a4de/wbgapi-1.0.12-py3-none-any.whl.metadata
  Downloading wbgapi-1.0.12-py3-none-any.whl.metadata (13 kB)
Downloading wbgapi-1.0.12-py3-none-any.whl (36 kB)
Installing collected packages: wbgapi
Successfully installed wbgapi-1.0.12
Note: you may need to restart the kernel to use updated packages.


In [1]:
import wbgapi as wb
import pandas as pd

# Advanced chart example: GDP, Health and Environment

Let's use the World Bank API's package to get data on:

- GDP Pc PPP
- CO2 intensity
- Population

### GDP per capita
First, starting with GDP per capita (PPP, 2021 USD)

In [2]:
# The GDP PC PPP data
full_gdp_pc_df = wb.data.DataFrame('NY.GDP.PCAP.PP.KD', labels=True)

In [3]:
gdp_pc_df = full_gdp_pc_df.copy() # Using a copy to avoid re-downloading the data as we experiment

gdp_pc_df = gdp_pc_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

gdp_pc_df = gdp_pc_df.melt(id_vars=['Country', 'economy'], var_name='date', value_name='gdp_pc') # Moving our data from wide to long format
gdp_pc_df.columns = ['country', 'iso3', 'date', 'gdp_pc'] # Renaming the columns

gdp_pc_df['date'] = gdp_pc_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
gdp_pc_df['date'] = pd.to_datetime(gdp_pc_df['date'], format='%Y') # And treating it as a date

gdp_pc_df = gdp_pc_df.dropna() # Dropping rows with missing values

gdp_pc_df

Unnamed: 0,country,iso3,date,gdp_pc
7980,Zimbabwe,ZWE,1990-01-01,4189.552481
7981,Zambia,ZMB,1990-01-01,2456.588763
7985,Viet Nam,VNM,1990-01-01,2416.030525
7987,Vanuatu,VUT,1990-01-01,3044.869413
7988,Uzbekistan,UZB,1990-01-01,3807.604591
...,...,...,...,...
17019,Central Europe and the Baltics,CEB,2023-01-01,42391.250132
17020,Caribbean small states,CSS,2023-01-01,28099.087171
17021,Arab World,ARB,2023-01-01,16288.054296
17022,Africa Western and Central,AFW,2023-01-01,4857.987533


## CO2 Intensity

In [4]:
full_co2_df = wb.data.DataFrame('EN.GHG.CO2.PC.CE.AR5', labels=False)

In [5]:
co2_df = full_co2_df.copy() # Using a copy to avoid re-downloading the data as we experiment

co2_df = co2_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

co2_df = co2_df.melt(id_vars=['economy'], var_name='date', value_name='co2') # Moving our data from wide to long format
co2_df.columns = ['iso3', 'date', 'co2'] # Renaming the columns

co2_df['date'] = co2_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
co2_df['date'] = pd.to_datetime(co2_df['date'], format='%Y') # And treating it as a date

co2_df = co2_df.dropna() # Dropping rows with missing values

co2_df

Unnamed: 0,iso3,date,co2
2660,ABW,1970-01-01,0.426353
2661,AFE,1970-01-01,1.350890
2662,AFG,1970-01-01,0.161267
2663,AFW,1970-01-01,0.349508
2664,AGO,1970-01-01,1.484021
...,...,...,...
16752,WSM,2022-01-01,1.598151
16754,YEM,2022-01-01,0.363930
16755,ZAF,2022-01-01,6.761533
16756,ZMB,2022-01-01,0.463126


## Population

In [6]:
full_pop_df = wb.data.DataFrame('SP.POP.TOTL', labels=False)

In [7]:
pop_df = full_pop_df.copy() # Using a copy to avoid re-downloading the data as we experiment

pop_df = pop_df.reset_index() # The country code (iso3) is in the index, so we need to move it to a column

pop_df = pop_df.melt(id_vars=['economy'], var_name='date', value_name='pop') # Moving our data from wide to long format
pop_df.columns = ['iso3', 'date', 'pop'] # Renaming the columns

pop_df['date'] = pop_df['date'].str.replace('YR', '') # Removing the 'YR' prefix from the date
pop_df['date'] = pd.to_datetime(pop_df['date'], format='%Y') # And treating it as a date

pop_df = pop_df.dropna() # Dropping rows with missing values

pop_df

Unnamed: 0,iso3,date,pop
0,ABW,1960-01-01,54608.0
1,AFE,1960-01-01,130692579.0
2,AFG,1960-01-01,8622466.0
3,AFW,1960-01-01,97256290.0
4,AGO,1960-01-01,5357195.0
...,...,...,...
17019,XKX,2023-01-01,1756374.0
17020,YEM,2023-01-01,34449825.0
17021,ZAF,2023-01-01,60414495.0
17022,ZMB,2023-01-01,20569737.0


## Adding Region Data

In [15]:
region_df = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/refs/heads/master/all/all.csv")
region_df = region_df[['alpha-3', 'region']]
region_df.columns = ['iso3', 'region']

# Merging our data

In [18]:
df = pd.merge(gdp_pc_df, co2_df, on=['iso3', 'date'], how='inner')
df = pd.merge(df, pop_df, on=['iso3', 'date'], how='inner')
df = pd.merge(df, region_df, on='iso3', how='inner')

df.to_csv('gdp_co2_pop_region.csv', index=False)

In [17]:
df

Unnamed: 0,country,iso3,date,gdp_pc,co2,pop,region
0,Zimbabwe,ZWE,1990-01-01,4189.552481,1.719130,10113893.0,Africa
1,Zimbabwe,ZWE,1991-01-01,4308.869474,1.843307,10377815.0,Africa
2,Zimbabwe,ZWE,1992-01-01,3823.256637,1.802359,10641501.0,Africa
3,Zimbabwe,ZWE,1993-01-01,3808.549288,1.637085,10794918.0,Africa
4,Zimbabwe,ZWE,1994-01-01,4135.880065,1.524000,10858594.0,Africa
...,...,...,...,...,...,...,...
6161,Djibouti,DJI,2018-01-01,5643.898574,0.664209,1057198.0,Africa
6162,Djibouti,DJI,2019-01-01,5863.701852,0.630450,1073994.0,Africa
6163,Djibouti,DJI,2020-01-01,5852.813398,0.625323,1090156.0,Africa
6164,Djibouti,DJI,2021-01-01,6032.277956,0.674411,1105557.0,Africa


In [9]:
gdp_pc_df.columns

Index(['country', 'iso3', 'date', 'gdp_pc'], dtype='object')