In [None]:
from io import StringIO
import json
import numpy as np
import pandas as pd
import pandasdmx as pdsdmx
import re
import requests

In [None]:
# API headers (compress response)
comp_head = {"Accept-Encoding": "gzip"}
# call to Unicef WH
Unicef = pdsdmx.Request("UNICEF", backend="memory", headers=comp_head)

agency = "ECARO"
# dataflow
dfd = "TRANSMONEE"
dsd_name = f"DSD_{agency}_{dfd}"
# call to TMEE DSD
Dsd_tm = Unicef.datastructure(dsd_name, provider=agency)

# indicators dictionary in DSD response: components position 1
tm_indicators = (
    Dsd_tm.structure['DSD_ECARO_TRANSMONEE'].dimensions.components[1]
    .local_representation.enumerated.items
)
# code to label access as:
# tm_indicators[code].name.localizations["en"]

In [None]:
unicef_api_url = "https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/"
rest_param = {
    "detail": "serieskeysonly",
    "format": "sdmx-csv",
    "labels": "id",
}
# rest_head = {
#     **comp_head,
#     "Accept": "application/vnd.sdmx.data+csv;version=1.0.0",
# }

# tm dimensions
tm_db_dim = ["SEX", "AGE", "RESIDENCE", "WEALTH_QUINTILE"]

# standard disaggregation
std_disagg = {
    "SEX": ["F", "M"],
    "AGE": ["fill me"],
    "RESIDENCE": ["R", "U"],
    "WEALTH_QUINTILE": [f"Q{i}" for i in range(1,6)],
}

In [None]:
# loop on indicators
ind_query_dict = {}
for key in tm_indicators:
    ind_keys = requests.get(
        url=f"{unicef_api_url}/{agency},{dfd}/.{key}....",
        params=rest_param,
        headers=comp_head,
    )
    
    # requests satisfactory
    if ind_keys.status_code == 200:
        keys_df = pd.read_csv(StringIO(ind_keys.text))
        # disagg
        disagg = [dim for dim in tm_db_dim if len(keys_df[dim].unique()) > 1]

        if not disagg:
            ind_query_dict[key] = {"TOTAL": "..."}

        else:
            keys_in_disagg = {dim: keys_df[dim].unique() for dim in disagg}
            total_not_in_disagg = {
                dim: "_T" not in keys_in_disagg[dim] for dim in disagg
            }
            check_std_disagg = {
                dim: (
                    not all([code in keys_in_disagg[dim] for code in std_disagg[dim]])
                    if dim != "AGE"
                    else total_not_in_disagg["AGE"]
                )
                for dim in disagg
            }

            # total codes if present in disaggregation
            total_codes = {
                dim: ("fill me" if total_not_in_disagg[dim] else "_T")
                for dim in disagg
            }

            # update age disaggregation if total contained
            if "AGE" in disagg:
                if total_not_in_disagg["AGE"]:
                    std_disagg.update({"AGE": ["fill me"]})
                else:
                    std_disagg.update({
                        "AGE": list(
                            np.delete(
                                keys_in_disagg["AGE"],
                                keys_in_disagg["AGE"] == "_T"
                            )
                        )
                    })

            # actual disaggregation
            actual_disagg = {
                dim: (
                    ["fill me"]
                    if check_std_disagg[dim]
                    else std_disagg[dim]
                )
                for dim in check_std_disagg
            }

            # use actual disaggregation in query
            ind_query_dict[key] = {
                dim_disagg: ".".join(
                    [
                        (
                            "+".join(actual_disagg[dim])
                            if dim == dim_disagg
                            else total_codes[dim]
                        ) if dim in disagg else ""
                        for dim in tm_db_dim
                    ]
                )
                for dim_disagg in disagg
            }

            # total if disaggregation query
            ind_query_dict[key].update({
                "TOTAL": ".".join([
                    total_codes[dim] if dim in disagg else "" for dim in tm_db_dim
                ])
            })

    else:
        ind_query_dict[key] = None

In [None]:
with open('config_query.json', 'w') as file_write:
    json.dump(ind_query_dict, indent=4, sort_keys=True, fp=file_write)
# print(json.dumps(ind_query_dict, indent=4, sort_keys=True))

In [None]:
# create dataframe to complete fillings
cols = ['Code', 'Name', 'SEX', 'AGE', 'RESIDENCE', 'WEALTH', 'to_fill']
to_fill_list = []

for key in ind_query_dict:
    if np.concatenate([
        re.findall("fill", elem)
        for elem in ind_query_dict[key].values()
    ]).size > 0:        
        ind_keys = requests.get(
            url=f"{unicef_api_url}/{agency},{dfd}/.{key}....",
            params=rest_param,
            headers=comp_head,
        )
        keys_df = pd.read_csv(StringIO(ind_keys.text))
        
        row_in_df = [
            key,
            tm_indicators[key].name.localizations["en"],
            keys_df["SEX"].unique(),
            keys_df["AGE"].unique(),
            keys_df["RESIDENCE"].unique(),
            keys_df["WEALTH_QUINTILE"].unique(),
            "YES",
        ]
        
        to_fill_list.append(pd.DataFrame({
            col: [str(row_in_df[i])] for i, col in enumerate(cols)
        }))

    elif "AGE" in ind_query_dict[key]:
        ind_keys = requests.get(
            url=f"{unicef_api_url}/{agency},{dfd}/.{key}....",
            params=rest_param,
            headers=comp_head,
        )
        keys_df = pd.read_csv(StringIO(ind_keys.text))

        row_in_df = [
            key,
            tm_indicators[key].name.localizations["en"],
            keys_df["SEX"].unique(),
            keys_df["AGE"].unique(),
            keys_df["RESIDENCE"].unique(),
            keys_df["WEALTH_QUINTILE"].unique(),
            "NO",
        ]
        
        to_fill_list.append(pd.DataFrame({
            col: [str(row_in_df[i])] for i, col in enumerate(cols)
        }))

df_to_fill = pd.concat(to_fill_list,ignore_index=True)

In [None]:
excel_file = "indicators_to_action.xlsx"
df_to_fill.to_excel(excel_file, index=False)

In [None]:
# reshape config query json file
filled_json = "config_query_filled.json"
file_to_read = open(filled_json)
filled_queries = json.load(file_to_read)

In [None]:
config_query_ind = {}
for key in filled_queries:
    if not(("TOTAL" in filled_queries[key]) and (filled_queries[key]["TOTAL"] == "...")):
        config_query_ind[key] = {
            dim_disagg: {
                dim: filled_queries[key][dim_disagg].split(".")[i].split("+")
                for i, dim in enumerate(tm_db_dim)
                if filled_queries[key][dim_disagg].split(".")[i] != ""
            }
            for dim_disagg in filled_queries[key]
        }

In [None]:
with open('config_query_ind.json', 'w') as file_write:
    json.dump(config_query_ind, indent=4, sort_keys=True, fp=file_write)

In [None]:
# indicator check on units (using a config call: no possible call on attributes)
rest_param_last = {
    **rest_param,
    "lastNObservations": "1",
}
rest_param_last.update({"detail": "full"})

# hard-coded accepted non-numerics: indicators with observations containing "<|>"
ind_accept_non_num = [
    'HVA_EPI_INF_ANN_15-24',
    'HVA_EPI_INF_RT_0-14',
    'HVA_EPI_INF_RT_10-19',
    'HVA_EPI_LHIV_0-19',
    'HVA_EPI_LHIV_15-24',
    'HVA_PED_ART_CVG',
    'HVA_PMTCT_ARV_CVG',
    'MG_INTNL_MG_CNTRY_DEST_PS',
]

for key in filled_queries:
    db_call = (
        "TOTAL" if "TOTAL" in filled_queries[key] else next(iter(filled_queries[key]))
    )
    ind_last_1 = requests.get(
        url=f"{unicef_api_url}/{agency},{dfd}/.{key}.{filled_queries[key][db_call]}",
        params=rest_param_last,
        headers=comp_head,
    )
    ind_last_df = pd.read_csv(StringIO(ind_last_1.text))
    if ind_last_df.UNIT_MEASURE.unique()[0] == "BINARY":
        if key in config_query_ind:
            config_query_ind[key].update({"DTYPE": "BINARY"})
        else:
            config_query_ind.update({
                key: {"DTYPE": "BINARY"}
            })
    elif key in ind_accept_non_num:
        if key in config_query_ind:
            config_query_ind[key].update({"DTYPE": "NUM:<|>"})
        else:
            config_query_ind.update({
                key: {"DTYPE": "NUM:<|>"}
            })

In [None]:
# with open('config_query_ind.json', 'w') as file_write:
#     json.dump(config_query_ind, indent=4, sort_keys=True, fp=file_write)

In [None]:
# key = "PT_CHLD_Y0T4_REG"
# ind_keys = requests.get(
#     url=f"{unicef_api_url}/{agency},{dfd}/.{key}..._T._T",
#     params=rest_param,
#     headers=comp_head,
# )
# keys_df = pd.read_csv(StringIO(ind_keys.text))
# len(keys_df.REF_AREA.unique())

In [None]:
# # rename column names: only code
    # cols = keys_df.columns.values
    # ren_dict = {k: v.split(":")[0] for k, v in zip(cols, cols)}
    # keys_df.rename(columns=ren_dict, inplace=True)
    # # retain codes only in dimension columns
    # for dim in tm_db_dim:
    #     keys_df.loc[:, dim] = keys_df[dim].apply(lambda x: x.split(":")[0])

In [None]:
            # std_disagg.update({
            #     "AGE": list(
            #         np.delete(keys_in_disagg["AGE"], keys_in_disagg["AGE"] == "_T")
            #     ) if ("AGE" in disagg) and all(total_in_disagg)
            #     else []
            # })
            # ind_query_dict[key] = {"TOTAL": []}
            # ind_query_dict[key].update({dim: [] for dim in disagg})
            
            # if all(total_in_disagg):
            #     ind_query_dict[key].update({
            #         "TOTAL": ".".join(
            #             ["_T" if dim in disagg else "" for dim in tm_db_dim]
            #         )
            #     })
            #     ind_query_dict[key].update({
            #         dim_disagg: ".".join(
            #             ["_T" if dim in disagg else "" for dim in tm_db_dim]
            #         )
            #     })

                # if "AGE" in disagg:
                #     ind_query_dict[key]["AGE"] = list(
                #         np.delete(
                #             keys_in_disagg["AGE"],
                #             keys_in_disagg["AGE"] == "_T"
                #         )
                #     )
                
                # else:
                #     ind_query_dict[key]["TOTAL"] = []

            # check_std_disagg = {
            #     dim: [code in keys_in_disagg[dim] for code in std_disagg[dim]]
            #     for dim in disagg
            #     if dim != "AGE"
            # }
            # check_disagg_concat = np.concatenate(list(check_std_disagg.values()))