In [None]:
import asyncio
from tqdm.asyncio import tqdm_asyncio
import pandas as pd
from pprint import pprint as print

from dfpp.sources.who_azureedge_net.retrieve import (
    list_indicators,
    get_indicator_data,
    get_all_dimension_values,
    list_dimensions,
)
from dfpp.sources.who_azureedge_net.transform import (
    process_dimension_data,
    transform_indicator,
    SOURCE_NAME,
)
from dfpp.publishing.publish import publish_series

MAX_CONCURRENCY = 1

In [None]:
df_indicators = pd.DataFrame(list_indicators())

In [None]:
df_dimension_codes = pd.DataFrame(list_dimensions())

In [None]:
df_dimension_values = await get_all_dimension_values(df_dimension_codes)

In [None]:
df_full_dimension_map = df_dimension_codes.merge(
    df_dimension_values,
    left_on="Code",
    right_on="Dimension",
    how="left",
    suffixes=("_dimension", "_value"),
)

In [None]:
df_full_dimension_map = process_dimension_data(df_full_dimension_map)

In [None]:
async def process_indicator(
    indicator: dict, semaphore: asyncio.Semaphore, df_full_dimension_map: pd.DataFrame
):
    indicator_code = indicator["IndicatorCode"]
    try:
        async with semaphore:
            indicator_code = indicator["IndicatorCode"]
            indicator_data = await get_indicator_data(indicator_code)

            df_source = pd.DataFrame(indicator_data["value"])

            if df_source.empty:
                raise ValueError("No data to transform")
            
            df = df_source.copy()
            df = transform_indicator(indicator, df, df_full_dimension_map)
            if df.empty:
                raise ValueError("No transformed data after the transform is performed")
            await publish_series(indicator_code, df, source_folder=SOURCE_NAME)
    except Exception as e:
        return indicator_code, e

In [None]:
async def process_all_indicators(
    df_indicators: pd.DataFrame,
    df_full_dimension_map: pd.DataFrame,
    max_concurrency: int = MAX_CONCURRENCY,
):
    semaphore = asyncio.Semaphore(max_concurrency)
    indicators = df_indicators.to_dict(orient="records")

    failed_indicators = []

    tasks = [
        process_indicator(indicator, semaphore, df_full_dimension_map)
        for indicator in indicators if not indicator["IndicatorCode"].endswith("_ARCHIVED")
    ]

    for future in tqdm_asyncio.as_completed(tasks):
        result = await future
        if isinstance(result, tuple) and len(result) == 2:
            print(result)
            failed_indicators.append(result)
    return failed_indicators

In [None]:
failed_indicators = await process_all_indicators(df_indicators, df_full_dimension_map)

In [None]:
assert len(failed_indicators) == 0, print(failed_indicators)