In [None]:
cd ../../../../

In [26]:
import asyncio

from pprint import pprint as print
import asyncio
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
import pandas as pd
from tqdm.asyncio import tqdm_asyncio

from dfpp.transformation.source_notebooks.ilo_org.geo_utils import get_iso3_map
from dfpp.transformation.source_notebooks.ilo_org.retrieve import (
    list_indicators,
    get_codebook,
    get_indicator,
)
from dfpp.transformation.source_notebooks.ilo_org.transform import (
    sanitize_categories,
    transform_indicator,
)
from dfpp.transformation.source_notebooks.ilo_org.publish import publish_indicator

MAX_CONCURRENCY = 1

In [27]:
ISO_3_MAP = await get_iso3_map()

In [None]:
(
    df_ref_area,
    df_indicator,
    df_sex,
    df_classif1,
    df_classif2,
    df_obs_status,
    df_note_classif,
    df_note_indicator,
    df_note_source,
) = get_codebook()

In [5]:
df_classif1, df_classif2 = sanitize_categories(df_classif1, df_classif2)

In [6]:
indicator_list = list_indicators()
df_indicators = pd.DataFrame(indicator_list)
df_annual_indicators = df_indicators[df_indicators.freq == "A"]
assert (
    df_annual_indicators.id.value_counts().max() == 1
), "Each indicator must have one record"

In [None]:
for indicator in df_annual_indicators.to_dict(orient="records"):
    try:
        indicator_id = indicator["id"]
        data = await get_indicator(indicator["id"])

        df_source = pd.DataFrame(data)
        assert df_source.shape[0] > 0, f"The {indicator_id} DataFrame is empty"

        assert (
            df_source.shape[0] == indicator["n_records"]
        ), "Shape mismatch with expected dimensions"
        print(indicator["indicator_label"])

        df = df_source.copy()

        df_indicator = transform_indicator(df, df_classif1, df_classif2, ISO_3_MAP)

        await publish_indicator(indicator_id, df_indicator)

    except Exception as e:
        return indicator_id, e

In [32]:
async def process_indicator(semaphore, indicator, df_classif1, df_classif2, ISO_3_MAP):
    try:
        indicator_id = indicator["id"]

        async with semaphore:
            data = await get_indicator(indicator["id"])

            df_source = pd.DataFrame(data)
            assert df_source.shape[0] > 0, f"The {indicator_id} DataFrame is empty"

            assert (
                abs(df_source.shape[0] - indicator["n_records"]) < 2
            ), f"Shape mismatch with expected dimensions expected {indicator['n_records']} but got {df_source.shape[0]}"
            print(indicator["indicator_label"])

            df = df_source.copy()

            df_indicator = transform_indicator(df, df_classif1, df_classif2, ISO_3_MAP)

            await publish_indicator(indicator_id, df_indicator)

    except Exception as e:
        return indicator_id, e


async def process_all_indicators(
    df_annual_indicators, df_classif1, df_classif2, ISO_3_MAP
):

    semaphore = asyncio.Semaphore(MAX_CONCURRENCY)
    failed_indicators = []

    indicators = df_annual_indicators.tail().to_dict(orient="records")

    tasks = [
        process_indicator(semaphore, indicator, df_classif1, df_classif2, ISO_3_MAP)
        for indicator in indicators
    ]

    for future in tqdm_asyncio.as_completed(tasks):
        result = await future
        if isinstance(result, tuple) and len(result) == 2:
            failed_indicators.append(result)

    return failed_indicators

In [None]:
failed_indicators = await process_all_indicators(
    df_annual_indicators, df_classif1, df_classif2, ISO_3_MAP
)

In [None]:
assert len(failed_indicators) == 0, print(failed_indicators)