In [1]:
import pandas as pd

In [42]:
import asyncio
import aiohttp
import pandas as pd
import numpy as np
from typing import List, Dict
import nest_asyncio

# Apply nest_asyncio to allow asyncio to work in Jupyter notebooks
nest_asyncio.apply()

async def fetch_page(session: aiohttp.ClientSession, url: str) -> List[Dict]:
    """
    Fetch a single page of data from the World Bank API.
    
    Args:
    session (aiohttp.ClientSession): The aiohttp session to use for requests
    url (str): The URL to fetch data from
    
    Returns:
    List[Dict]: A list of data dictionaries from the API response
    """
    async with session.get(url) as response:
        if response.status == 200:
            data = await response.json()
            return data
        else:
            print(f"Error fetching data: HTTP {response.status}")
            return []

async def fetch_world_bank_data(indicator: str) -> pd.DataFrame:
    """
    Fetch data for all years and all countries for a specified indicator from the World Bank API.
    
    Args:
    indicator (str): The indicator code (e.g., 'NY.GDP.PCAP.CD' for GDP per capita)
    
    Returns:
    pd.DataFrame: A DataFrame containing the fetched data
    """
    base_url = "http://api.worldbank.org/v2/country/all/indicator/"
    per_page = 10000
    
    async with aiohttp.ClientSession() as session:
        all_data = []
        page = 1
        
        while True:
            url = f"{base_url}{indicator}?format=json&per_page={per_page}&page={page}"
            data = await fetch_page(session, url)
            
            if not data or len(data) < 2 or not data[1]:
                break
            
            all_data.extend(data[1])
            page += 1
            
            # Check if we've reached the last page
            if len(data[1]) < per_page:
                break
    
    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_data)
    
    if df.empty:
        return df
    
    # Process the DataFrame
    df['country_name'] = df['country'].apply(lambda x: x['value'] if isinstance(x, dict) else x)
    df['value'] = pd.to_numeric(df['value'], errors='coerce')
    df['date'] = pd.to_datetime(df['date'].astype(str), format='%Y', errors='coerce')
    df = df.drop(columns=['indicator', 'obs_status', 'decimal', 'country'])
    
    return df

# Function to run the async code
def get_world_bank_data(indicator: str) -> pd.DataFrame:
    return asyncio.run(fetch_world_bank_data(indicator))


In [45]:
# Test Exanple
if __name__ == "__main__":
    indicator = "NY.GDP.PCAP.CD"  # GDP per capita
    
    start_time = time.time()  # Start the timer
    df = get_world_bank_data(indicator)
    end_time = time.time()    # End the timer
    
    elapsed_time = end_time - start_time  # Calculate the elapsed time
    print(f"Function execution time: {elapsed_time:.2f} seconds")
    
    print(df.head())

Function execution time: 0.65 seconds
  countryiso3code       date        value unit                 country_name
0             AFE 2023-01-01  1672.505957       Africa Eastern and Southern
1             AFE 2022-01-01  1642.432039       Africa Eastern and Southern
2             AFE 2021-01-01  1545.956697       Africa Eastern and Southern
3             AFE 2020-01-01  1356.088871       Africa Eastern and Southern
4             AFE 2019-01-01  1508.486886       Africa Eastern and Southern
