In [3]:
import wbgapi as wb
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
import altair as alt

# Data

In [14]:
raw_data_dir = "../data/raw"
africa_gdp = "africa_gdp.csv"

Method 1

Downloading data for all economies, then filtering for Africa

In [7]:
# Get all economies and filter for African countries
all_countries = wb.economy.DataFrame().reset_index()

In [8]:
all_countries

Unnamed: 0,id,name,aggregate,longitude,latitude,region,adminregion,lendingType,incomeLevel,capitalCity
0,ABW,Aruba,False,-70.0167,12.51670,LCN,,LNX,HIC,Oranjestad
1,AFE,Africa Eastern and Southern,True,,,,,,,
2,AFG,Afghanistan,False,69.1761,34.52280,MEA,MNA,IDX,LIC,Kabul
3,AFW,Africa Western and Central,True,,,,,,,
4,AGO,Angola,False,13.2420,-8.81155,SSF,SSA,IBD,LMC,Luanda
...,...,...,...,...,...,...,...,...,...,...
261,XKX,Kosovo,False,20.9260,42.56500,ECS,ECA,IDX,UMC,Pristina
262,YEM,"Yemen, Rep.",False,44.2075,15.35200,MEA,MNA,IDX,LIC,Sana'a
263,ZAF,South Africa,False,28.1871,-25.74600,SSF,SSA,IBD,UMC,Pretoria
264,ZMB,Zambia,False,28.2937,-15.39820,SSF,SSA,IDX,LMC,Lusaka


In [9]:
african_regions = ['SSF', 'MEA']  # Sub-Saharan Africa and MENA

# Get list of African country codes
africa_countries = all_countries[
    all_countries['region'].isin(['SSA', 'MEA', 'SSF'])
]['id'].tolist()

Method 2 -> Explicit Listing (More Reliable)

In [12]:
africa_codes = [
    'DZA', 'AGO', 'BEN', 'BWA', 'BFA', 'BDI', 'CMR', 'CPV', 'CAF', 
    'TCD', 'COM', 'COG', 'COD', 'CIV', 'DJI', 'EGY', 'GNQ', 'ERI',
    'SWZ', 'ETH', 'GAB', 'GMB', 'GHA', 'GIN', 'GNB', 'KEN', 'LSO',
    'LBR', 'LBY', 'MDG', 'MWI', 'MLI', 'MRT', 'MUS', 'MAR', 'MOZ',
    'NAM', 'NER', 'NGA', 'RWA', 'STP', 'SEN', 'SYC', 'SLE', 'SOM',
    'ZAF', 'SSD', 'SDN', 'TZA', 'TGO', 'TUN', 'UGA', 'ZMB', 'ZWE'
]

# Get GDP data (current US$)
print("Fetching GDP data for African countries...")
gdp_data = wb.data.DataFrame(
    'NY.GDP.MKTP.CD',  # GDP (current US$)
    africa_codes,
    time=range(1957, 2025),  
    labels=True,  # Use country names instead of codes
    skipBlanks=True,
    numericTimeKeys=True
)
print("All done ✅")

Fetching GDP data for African countries...
All done ✅


In [16]:
path = os.path.join(raw_data_dir, africa_gdp)
with open(path, 'w') as f:
    f.write(gdp_data.to_csv(index=False))

In [17]:
def read_gdp_data(country_codes, start_year, end_year,data_dir, name):
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    file_path = os.path.join(data_dir, name)
    if not os.path.isfile(file_path):
        print("Fetching GDP data...")
        df = wb.data.DataFrame(
        'NY.GDP.MKTP.CD',  # GDP (current US$)
        africa_codes,
        time=range(1957, 2025),  
        labels=True,  # Use country names instead of codes
        skipBlanks=True,
        numericTimeKeys=True)
        print("All done ✅")
        with open(file_path, 'w') as f:
            f.write(df.to_csv(index=False))
    else:
        print("Reading data from disk...")
        df = pd.read_csv(file_path)
        print("All done ✅")
    
    return df



In [19]:
country_codes = africa_codes
start = 1957
end = 2025

In [20]:
df = read_gdp_data(country_codes = africa_codes, start_year = start, end_year = end ,data_dir = raw_data_dir, name = africa_gdp)

Reading data from disk...
All done ✅


In [21]:
df.head()

Unnamed: 0,Country,1960,1961,1962,1963,1964,1965,1966,1967,1968,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Zimbabwe,1053528000.0,1097207000.0,1118172000.0,1160104000.0,1217759000.0,1312105000.0,1282404000.0,1397715000.0,1480355000.0,...,19973250000.0,20559250000.0,51035660000.0,34141670000.0,33357700000.0,31980330000.0,41287670000.0,40757560000.0,35871780000.0,41539410000.0
1,Zambia,698739700.0,682359700.0,679279700.0,704339700.0,822639700.0,1061200000.0,1239000000.0,1340639000.0,1573739000.0,...,21251220000.0,20958410000.0,25873600000.0,26311510000.0,23308670000.0,18137760000.0,22096420000.0,29163780000.0,27577960000.0,25303190000.0
2,Uganda,423145600.0,441667300.0,449158200.0,516315200.0,589247700.0,884502300.0,925381500.0,967240700.0,1037379000.0,...,32387180000.0,29203990000.0,30744470000.0,32927030000.0,35353060000.0,37600370000.0,40529790000.0,45565330000.0,48768960000.0,53911910000.0
3,Tunisia,,866155400.0,880027700.0,1026738000.0,1025867000.0,991047600.0,1040952000.0,1085714000.0,1214667000.0,...,45779490000.0,44360070000.0,42163530000.0,42686500000.0,41905640000.0,42491780000.0,47073230000.0,44929920000.0,48205330000.0,51332290000.0
4,Togo,171057100.0,178497100.0,186745800.0,202305900.0,234572200.0,264505500.0,305227600.0,327215800.0,341691600.0,...,5755458000.0,6071168000.0,6387424000.0,7029216000.0,6992654000.0,7486032000.0,8541668000.0,8646453000.0,9816236000.0,10651180000.0


# Read West Africa Data

In [25]:
data_dir = '../data/raw'
name = 'west_africa.csv'
start = 1970
end = 2025

In [None]:
def read_west_africa_data(core_indicators, start, end,data_dir, name):
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    file_path = os.path.join(data_dir, name)
    if not os.path.isfile(file_path):
        print("Fetching West Africa data...")
        df = wb.data.DataFrame(
            list(core_indicators.keys()),
            west_africa,
            time=range(start, end),
            labels=True)
        df = df.reset_index()
        year_cols = [c for c in df.columns if c.startswith("YR")]
        df_long = df.melt(
            id_vars=["economy", "Country", "series"],
            value_vars=year_cols,
            var_name="year",
            value_name="value"
        )
        df_long["year"] = df_long["year"].str.replace("YR", "").astype(int)

        df_long = df_long.rename(
            columns={
                "economy": "country_code",
                "Country": "country",
                "series": "indicator",
            }
        )
        print("All done ✅")
        with open(file_path, 'w') as f:
            f.write(df_long.to_csv(index=False))
    else:
        print("Reading data from disk...")
        df_long = pd.read_csv(file_path)
        print("All done ✅")
    
    return df_long

In [28]:
west_africa = ['BEN', 'BFA', 'CPV', 'CIV', 'GMB', 'GHA', 'GIN', 'GNB', 
               'LBR', 'MLI', 'MRT', 'NER', 'NGA', 'SEN', 'SLE', 'TGO']

# Core indicators only
core_indicators = {
    'NY.GDP.PCAP.CD': 'GDP per capita',
    'SP.POP.TOTL': 'Population',
    'SP.DYN.LE00.IN': 'Life expectancy',
    'SE.ADT.LITR.ZS': 'Literacy rate',
    'SH.DYN.MORT': 'Child mortality',
    'IT.NET.USER.ZS': 'Internet users %',
    'SL.UEM.TOTL.ZS': 'Unemployment',
    'EG.ELC.ACCS.ZS': 'Electricity access',
    'SI.POV.DDAY': 'Poverty rate',
    'NY.GDP.MKTP.KD.ZG': 'GDP growth'
}

In [36]:
df = read_west_africa_data(core_indicators = core_indicators, start = start, end = end,data_dir = data_dir, name = name)
df.head()

Fetching West Africa data...
All done ✅


Unnamed: 0,country_code,country,indicator,year,value
0,TGO,Togo,NY.GDP.PCAP.CD,2010,704.986772
1,SLE,Sierra Leone,NY.GDP.PCAP.CD,2010,685.039673
2,SEN,Senegal,NY.GDP.PCAP.CD,2010,1275.883575
3,NGA,Nigeria,NY.GDP.PCAP.CD,2010,2202.25673
4,NER,Niger,NY.GDP.PCAP.CD,2010,474.425692


In [38]:
print(df.dtypes)
print(df.isna().sum().sort_values(ascending=False).head())

country_code     object
country          object
indicator        object
year              int64
value           float64
dtype: object
value           341
country_code      0
country           0
indicator         0
year              0
dtype: int64


In [42]:
print(df.shape)
print(df["indicator"].nunique())
print(df["country"].nunique())
print(df["year"].min()), print(df["year"].max())

(2240, 5)
10
16
2010
2023


(None, None)