In [None]:
from io import StringIO
import json
import numpy as np
import pandas as pd
import pandasdmx as pdsdmx
import re
import requests

In [None]:
# API headers (compress response)
comp_head = {"Accept-Encoding": "gzip"}
# call to Unicef WH
Unicef = pdsdmx.Request("UNICEF", backend="memory", headers=comp_head)

agency = "BRAZIL_CO"
# dataflow
dfd = "BRAZIL_CO"
dsd_name = f"DSD_{agency}"
# call to DSD
Dsd_db = Unicef.datastructure(dsd_name, provider=agency)

# indicators dictionary in DSD response: components position 1
db_indicators = (
    Dsd_db.structure[dsd_name].dimensions.components[1]
    .local_representation.enumerated.items
)
# code to label access as:
# tm_indicators[code].name.localizations["en"]

In [None]:
unicef_api_url = "https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/"
rest_param = {
    "detail": "serieskeysonly",
    "format": "sdmx-csv",
    "labels": "id",
}
# rest_head = {
#     **comp_head,
#     "Accept": "application/vnd.sdmx.data+csv;version=1.0.0",
# }

# db dimensions
db_dim = ["AGE", "EDUCATION_LEVEL"]

In [None]:
# loop on indicators
ind_query_dict = {}
for key in db_indicators:
    ind_keys = requests.get(
        url=f"{unicef_api_url}/{agency},{dfd}/.{key}..",
        params=rest_param,
        headers=comp_head,
    )
    
    # requests satisfactory
    if ind_keys.status_code == 200:
        keys_df = pd.read_csv(StringIO(ind_keys.text))
        # disagg
        disagg = [dim for dim in db_dim if len(keys_df[dim].unique()) > 1]

        if not disagg:
            ind_query_dict[key] = {"TOTAL": "."}

        else:
            keys_in_disagg = {dim: keys_df[dim].unique() for dim in disagg}
            
            # total codes if present in disaggregation else first selection
            total_not_in_disagg = {
                dim: "_T" not in keys_in_disagg[dim] for dim in disagg
            }
            total_codes = {
                dim: (keys_in_disagg[dim][0] if total_not_in_disagg[dim] else "_T")
                for dim in disagg
            }

            # use actual disaggregation in query
            ind_query_dict[key] = {
                dim_disagg: ".".join(
                    [
                        (
                            "+".join(keys_in_disagg[dim])
                            if dim == dim_disagg
                            else total_codes[dim]
                        ) if dim in disagg else ""
                        for dim in db_dim
                    ]
                )
                for dim_disagg in disagg
            }

            # total if disaggregation query
            ind_query_dict[key].update({
                "TOTAL": ".".join([
                    total_codes[dim] if dim in disagg else "" for dim in db_dim
                ])
            })

In [None]:
with open('config_query.json', 'w') as file_write:
    json.dump(ind_query_dict, indent=4, sort_keys=True, fp=file_write)
# print(json.dumps(ind_query_dict, indent=4, sort_keys=True))

In [None]:
# reshape config query json file
config_query_ind = {}
for key in ind_query_dict:
    if not(("TOTAL" in ind_query_dict[key]) and (ind_query_dict[key]["TOTAL"] == ".")):
        config_query_ind[key] = {
            dim_disagg: {
                dim: ind_query_dict[key][dim_disagg].split(".")[i].split("+")
                for i, dim in enumerate(db_dim)
                if ind_query_dict[key][dim_disagg].split(".")[i] != ""
            }
            for dim_disagg in ind_query_dict[key]
        }

In [None]:
with open('config_query_ind.json', 'w') as file_write:
    json.dump(config_query_ind, indent=4, sort_keys=True, fp=file_write)