### Indicators and Country Dictionary

In [1]:
import pandas as pd

# List of African countries
countries = ['ZAF', 'NGA', 'EGY', 'KEN', 'ETH', 'GHA', 'TZA', 'UGA', 'CIV', 'CMR']

# Dictionary of indicators with their standard names
indicator_names = {
    'NY.GDP.MKTP.CD': 'GDP (current US$)',
    'NY.GDP.PCAP.CD': 'GDP per capita (current US$)',
    'NY.GDP.MKTP.KD.ZG': 'GDP growth (annual %)',
    'FP.CPI.TOTL.ZG': 'Inflation, consumer prices (annual %)',
    'DT.DOD.DECT.CD': 'External debt stocks, total (DOD, current US$)',
    'SI.POV.NAHC': 'Poverty headcount ratio at national poverty lines (% of population)',
    'SI.POV.DDAY': 'Poverty headcount ratio at $2.15 a day (2017 PPP) (% of population)',
    'SH.DYN.MORT': 'Mortality rate, under-5 (per 1,000 live births)',
    'SH.STA.MMRT': 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
    'SH.HIV.INCD.ZS': 'Incidence of HIV (% of uninfected population ages 15-49)',
    'SH.IMM.MEAS': 'Immunization, measles (% of children ages 12-23 months)',
    'SE.PRM.ENRR': 'School enrollment, primary (% gross)',
    'SE.SEC.ENRR': 'School enrollment, secondary (% gross)',
    'SE.ADT.LITR.ZS': 'Literacy rate, adult total (% of people ages 15 and above)',
    'SG.GEN.PARL.ZS': 'Proportion of seats held by women in national parliaments (%)',
    'SL.TLF.CACT.FE.ZS': 'Labor force participation rate, female (% of female population ages 15+)',
    'SH.H2O.SMDW.ZS': 'People using safely managed drinking water services (% of population)',
    'SH.STA.SMSS.ZS': 'People using safely managed sanitation services (% of population)',
    'EG.ELC.ACCS.ZS': 'Access to electricity (% of population)',
    'EG.USE.ELEC.KH.PC': 'Electric power consumption (kWh per capita)',
    'SL.EMP.VULN.ZS': 'Vulnerable employment, total (% of total employment)',
    'SL.UEM.TOTL.ZS': 'Unemployment, total (% of total labor force)',
    'IT.NET.USER.ZS': 'Individuals using the Internet (% of population)',
    'IT.CEL.SETS.P2': 'Mobile cellular subscriptions (per 100 people)',
    'EN.ATM.CO2E.PC': 'CO2 emissions (metric tons per capita)',
    'AG.LND.FRST.ZS': 'Forest area (% of land area)',
    'AG.YLD.CREL.KG': 'Cereal yield (kg per hectare)',
    'SN.ITK.DEFC.ZS': 'Prevalence of undernourishment (% of population)',
    'FX.OWN.TOTL.ZS': 'Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+)'
}

# Dictionary of indicators
indicators = {
    'Economic Growth': ['NY.GDP.MKTP.CD', 'NY.GDP.PCAP.CD', 'NY.GDP.MKTP.KD.ZG'],
    'Liquidity': ['FP.CPI.TOTL.ZG', 'DT.DOD.DECT.CD'],
    'Poverty and Inequality': ['SI.POV.NAHC', 'SI.POV.DDAY'],
    'Health': ['SH.DYN.MORT', 'SH.STA.MMRT', 'SH.HIV.INCD.ZS', 'SH.IMM.MEAS'],
    'Education': ['SE.PRM.ENRR', 'SE.SEC.ENRR', 'SE.ADT.LITR.ZS'],
    'Gender Equality': ['SG.GEN.PARL.ZS', 'SL.TLF.CACT.FE.ZS'],
    'Water and Sanitation': ['SH.H2O.SMDW.ZS', 'SH.STA.SMSS.ZS'],
    'Energy': ['EG.ELC.ACCS.ZS', 'EG.USE.ELEC.KH.PC'],
    'Employment and Decent Work': ['SL.EMP.VULN.ZS', 'SL.UEM.TOTL.ZS'],
    'Infrastructure and Innovation': ['IT.NET.USER.ZS', 'IT.CEL.SETS.P2'],
    'Climate Action': ['EN.ATM.CO2E.PC', 'AG.LND.FRST.ZS'],
    'Agriculture and Food Security': ['AG.YLD.CREL.KG', 'SN.ITK.DEFC.ZS'],
    'Financial Inclusion': ['FX.OWN.TOTL.ZS']
}



### Define Simple Synchronous Requests

In [None]:
import requests
import pandas as pd
from typing import List, Union
import logging

import asyncio
import aiohttp

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
WORLD_BANK_URL = 'http://api.worldbank.org/v2'

def fetch_world_bank_data(indicator: str, countries: Union[str, List[str]], start_year: int = 1960, end_year: int = None) -> pd.DataFrame:
    """Fetch data for specified years and countries for a given indicator from the World Bank API."""
    if end_year is None:
        end_year = pd.Timestamp.now().year
    
    countries_str = ';'.join(countries) if isinstance(countries, list) else countries
    
    url = f"{WORLD_BANK_URL}/country/{countries_str}/indicator/{indicator}"
    params = {
        'format': 'json',
        'per_page': 10000,  
        'date': f"{start_year}:{end_year}"
    }
    
    all_data = []
    page = 1
    
    while True:
        params['page'] = page
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json()
            
            if not data or len(data) < 2 or not data[1]:
                break
            
            all_data.extend(data[1])
            
            if len(data[1]) < params['per_page']:
                break
            
            page += 1
        except requests.RequestException as e:
            logger.error(f"Error fetching data: {str(e)}")
            break
    
    return process_world_bank_data(all_data, indicator)

def process_world_bank_data(data: List[dict], indicator: str) -> pd.DataFrame:
    """Process the fetched World Bank data into a DataFrame."""
    if not data:
        logger.warning(f"No data retrieved for indicator: {indicator}")
        return pd.DataFrame()

    df = pd.DataFrame(data)
    
    df['country_name'] = df['country'].apply(lambda x: x['value'] if isinstance(x, dict) else x)
    df['value'] = pd.to_numeric(df['value'], errors='coerce')
    df['date'] = pd.to_datetime(df['date'], format='%Y')
    
    df = df.drop(columns=['indicator', 'obs_status', 'decimal', 'country', 'unit'])
    df = df.rename(columns={'countryiso3code': 'country_code', 'date': 'year', 'value': indicator})
    
    return df.set_index(['country_name', 'country_code', 'year']).sort_index()

def get_world_bank_data(indicator: str, countries: Union[str, List[str]], start_year: int = 1960, end_year: int = None) -> pd.DataFrame:
    """Fetch and process World Bank data for a given indicator."""
    try:
        df = fetch_world_bank_data(indicator, countries, start_year, end_year)
        logger.info(f"Successfully retrieved data for {indicator}")
        return df
    except Exception as e:
        logger.exception(f"Error retrieving data for {indicator}: {str(e)}")
        return pd.DataFrame()

In [None]:
### Test Function

In [None]:
#Example
if __name__ == "__main__":
    indicator = "NY.GDP.PCAP.CD"
    countries = ["USA", "UKR", "JPN"]
    start_year = 1960
    end_year = 2020
    df = get_world_bank_data(indicator, countries, start_year, end_year)
    print(df.head())

### Define Batch Asynchronous Requests

In [6]:
import asyncio
import aiohttp
import nest_asyncio
import pandas as pd
from typing import List, Union
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
WORLD_BANK_URL = 'http://api.worldbank.org/v2'

# Apply nest_asyncio to allow asyncio in Jupyter
nest_asyncio.apply()

async def fetch_world_bank_data_async(session: aiohttp.ClientSession, indicator: str, countries: Union[str, List[str]], start_year: int = 1960, end_year: int = None) -> pd.DataFrame:
    """Fetch data for specified years and countries for a given indicator from the World Bank API asynchronously."""
    if end_year is None:
        end_year = pd.Timestamp.now().year
    
    countries_str = ';'.join(countries) if isinstance(countries, list) else countries
    
    url = f"{WORLD_BANK_URL}/country/{countries_str}/indicator/{indicator}"
    params = {
        'format': 'json',
        'per_page': 10000,  
        'date': f"{start_year}:{end_year}"
    }
    
    all_data = []
    page = 1
    
    while True:
        params['page'] = page
        try:
            async with session.get(url, params=params) as response:
                response.raise_for_status()
                data = await response.json()
            
            if not data or len(data) < 2 or not data[1]:
                break
            
            all_data.extend(data[1])
            
            if len(data[1]) < params['per_page']:
                break
            
            page += 1
        except aiohttp.ClientError as e:
            logger.error(f"Error fetching data for {indicator}: {str(e)}")
            break
    
    return process_world_bank_data(all_data, indicator)

def process_world_bank_data(data: List[dict], indicator: str) -> pd.DataFrame:
    """Process the fetched World Bank data into a DataFrame."""
    if not data:
        logger.warning(f"No data retrieved for indicator: {indicator}")
        return pd.DataFrame()

    df = pd.DataFrame(data)
    
    df['country_name'] = df['country'].apply(lambda x: x['value'] if isinstance(x, dict) else x)
    df['value'] = pd.to_numeric(df['value'], errors='coerce')
    df['date'] = pd.to_datetime(df['date'], format='%Y')
    
    df = df.drop(columns=['indicator', 'obs_status', 'decimal', 'country', 'unit'])
    df = df.rename(columns={'countryiso3code': 'country_code', 'date': 'year', 'value': indicator})
    
    return df.set_index(['country_name', 'country_code', 'year']).sort_index()

async def get_world_bank_data_async(session: aiohttp.ClientSession, indicator: str, countries: Union[str, List[str]], start_year: int = 1960, end_year: int = None) -> pd.DataFrame:
    """Fetch and process World Bank data for a given indicator asynchronously."""
    try:
        df = await fetch_world_bank_data_async(session, indicator, countries, start_year, end_year)
        logger.info(f"Successfully retrieved data for {indicator}")
        return df
    except Exception as e:
        logger.exception(f"Error retrieving data for {indicator}: {str(e)}")
        return pd.DataFrame()

async def fetch_all_indicators(indicators: dict, countries: List[str]):
    """Fetch data for all indicators concurrently."""
    async with aiohttp.ClientSession() as session:
        tasks = []
        for category, indicator_list in indicators.items():
            for indicator in indicator_list:
                task = get_world_bank_data_async(session, indicator, countries)
                tasks.append(task)
        
        results = await asyncio.gather(*tasks)
    
    data = {}
    for indicator, df in zip(sum(indicators.values(), []), results):
        standard_name = indicator_names[indicator]
        df.columns = [standard_name]
        data[standard_name] = df
    
    return data


In [None]:
# Run the asynchronous data fetching
data = asyncio.run(fetch_all_indicators(indicators, countries))

# Update the indicators dictionary to use standard names
indicators_standard = {category: [indicator_names[ind] for ind in indicator_list] 
                       for category, indicator_list in indicators.items()}