In [2]:
from pprint import pprint as print
import pandas as pd
import httpx
from tqdm.asyncio import tqdm_asyncio

from dfpp.geo_utils import get_iso3_to_official_name_map

from dfpp.transformation.sources.imf_org.retrieve import (
    list_indicators,
    get_indicator_data,
)
from dfpp.transformation.sources.imf_org.transform import transform
from dfpp.publishing import publish_series

MAX_CONCURRENCY = 10
HTTP_REQUEST_TIMEOUT = 120

In [3]:
ISO_3_MAP = await get_iso3_to_official_name_map()
df_indicators = list_indicators()

In [4]:
async def process_all_indicators(df_indicators: pd.DataFrame) -> pd.DataFrame:
    failed_indicators = []
    indicators = df_indicators.to_dict(orient="records")
    async with httpx.AsyncClient(
        timeout=HTTP_REQUEST_TIMEOUT,
        limits=httpx.Limits(max_connections=MAX_CONCURRENCY),
    ) as http_client:
        tasks = [
            process_one_indicator(http_client, indicator) for indicator in indicators
        ]
        for future in tqdm_asyncio.as_completed(tasks):
            result = await future
            if isinstance(result, tuple) and len(result) == 2:
                print(result)
                failed_indicators.append(result)
    return failed_indicators


async def process_one_indicator(http_client: httpx.AsyncClient, indicator: dict):
    try:
        indicator_id = indicator["id"]
        df_source = await get_indicator_data(
            http_client, indicator_id
        )
        print(indicator["label"])
        df = df_source.copy()
        assert df.shape[0] > 0, "DataFrame is empty"
        df_final = transform(df, indicator, ISO_3_MAP)
        await publish_series(indicator_id, df_final, source_folder="imf_datamapper_api")
    except Exception as e:
        return indicator_id, e

In [None]:
failed_series = await process_all_indicators(df_indicators)

In [6]:
assert len(failed_series) == 0, print(failed_series)