<a href="https://colab.research.google.com/github/dohyung-kim/ccri/blob/main/script/adm0/sdmx_api_p2_data_download.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
import geopandas as gpd
import requests
import pandas as pd
import xml.etree.ElementTree as ET

In [70]:
outpath = input("Enter output save path:")
# /Users/kelseydoerksen/code/unicef/ccri/data/CCRI_results_misc

Enter output save path: /Users/kelseydoerksen/code/unicef/ccri/data/CCRI_results_misc


In [71]:
p2_var = input("Enter P2 save name:")
# EG: P2_basic_hygiene.csv
# P2_under5_mortality.csv

Enter P2 save name: P2_under5_mortality.csv


In [72]:
api_url_template = input("Enter api path for files to query")

# EG: this is a api url for wash basic hygiene - replace for other p2 variables
#"https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/UNICEF,WASH_HOUSEHOLDS,1.0/{iso3}.WS_PPL_H-B.HYG._T._T?format=sdmx-compact-2.1"
# Under-5 Mortality 'https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/UNICEF,CME,1.0/{iso3}.CME_MRY0T4..?format=sdmx-compact-2.1'

Enter api path for files to query https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/UNICEF,CME,1.0/{iso3}.CME_MRY0T4..?format=sdmx-compact-2.1


In [74]:
# File path for the GeoJSON
geojson_file_path = input("Enter full filepath to adm0 global boundaries")
#geojson_file_path = "/content/drive/MyDrive/CCRI/global_bnd_adm0.geojson"
# Kelsey local: /Users/kelseydoerksen/code/unicef/ccri/data/misc/adm0_boundaries_simple.geojson

Enter full filepath to adm0 global boundaries /Users/kelseydoerksen/code/unicef/ccri/data/misc/adm0_boundaries_simple.geojson


In [77]:
# Function to fetch data for a single country using the UNICEF dataflow endpoint
def fetch_country_data(iso3, api_url_template):
    """
    Fetch data for a given country using its ISO3 code.
    Returns the most recent observation as a dictionary, or None if data is missing.
    """
    # Construct URL using Python's format method
    api_url = api_url_template.format(iso3=iso3)
    try:
        response = requests.get(api_url)
        if response.status_code == 200:
            try:
                # Parse XML using fromstring (handles encoding better)
                root = ET.fromstring(response.content)
            except ET.ParseError as pe:
                print(f"XML parsing error for {iso3}: {pe}")
                return None

            # The Series element is not namespaced in this response
            series = root.find(".//Series")
            if series is not None:
                obs_list = series.findall("Obs")
                if obs_list:
                    data = []
                    for obs in obs_list:
                        time_period_str = obs.get("TIME_PERIOD")
                        obs_value_str = obs.get("OBS_VALUE")
                        try:
                            time_period = int(time_period_str)
                            obs_value = float(obs_value_str)
                        except Exception as conv_err:
                            print(f"Conversion error for {iso3}: {conv_err}")
                            continue
                        data.append({
                            "iso3": iso3,
                            "time_period": time_period,
                            "obs_value": obs_value,
                            "data_source": obs.get("DATA_SOURCE")
                        })
                    if data:
                        # Choose the most recent record based on the time period
                        most_recent = max(data, key=lambda x: x["time_period"])
                        return most_recent
                    else:
                        print(f"No valid observation data for {iso3}")
                        return None
                else:
                    print(f"No observation elements found for {iso3}")
                    return None
            else:
                print(f"No Series element found for {iso3}")
                return None
        else:
            print(f"Failed to fetch data for {iso3}. HTTP Status: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error processing {iso3}: {e}")
        return None

# Load the GeoJSON file containing country boundaries and ISO3 codes
countries_gdf = gpd.read_file(geojson_file_path)

# Verify the GeoDataFrame columns to ensure the ISO3 field exists
print("GeoDataFrame columns:", countries_gdf.columns)

# Extract ISO3 codes (update the field name if it's not exactly 'iso3')
iso3_codes = countries_gdf['ISO3'].unique()

# This uses {iso3} as a placeholder that will be replaced with the actual ISO3 code.

GeoDataFrame columns: Index(['id', 'Alternativ', 'ISO3', 'Map label', 'MapClr', 'UcodeIn', 'WFP_ID',
       'cnpt_uuid', 'default', 'end_date', 'is_latest', 'level', 'level_name',
       'name', 'name_en', 'start_date', 'type', 'ucode', 'uuid', 'geometry'],
      dtype='object')


In [81]:
# List to store fetched data for all countries
all_data = []

# Loop through ISO3 codes, fetch data and add it to the list if available
for iso3 in iso3_codes:
    print(f"Processing {iso3}...")
    result = fetch_country_data(iso3, api_url_template)
    if result:
        print(f"Data for {iso3}: {result}")
        all_data.append(result)
    else:
        print(f"No data found for {iso3}")

Processing ATA...
Failed to fetch data for ATA. HTTP Status: 404
No data found for ATA
Processing AIA...
Data for AIA: {'iso3': 'AIA', 'time_period': 2023, 'obs_value': 5.8644762772405, 'data_source': None}
Processing ESH...
Failed to fetch data for ESH. HTTP Status: 404
No data found for ESH
Processing PYF...
Failed to fetch data for PYF. HTTP Status: 404
No data found for PYF
Processing NCL...
Failed to fetch data for NCL. HTTP Status: 404
No data found for NCL
Processing BMU...
Failed to fetch data for BMU. HTTP Status: 404
No data found for BMU
Processing GIB...
Failed to fetch data for GIB. HTTP Status: 404
No data found for GIB
Processing VGB...
Data for VGB: {'iso3': 'VGB', 'time_period': 2023, 'obs_value': 12.6102522275391, 'data_source': None}
Processing CYM...
Failed to fetch data for CYM. HTTP Status: 404
No data found for CYM
Processing FLK...
Failed to fetch data for FLK. HTTP Status: 404
No data found for FLK
Processing MSR...
Data for MSR: {'iso3': 'MSR', 'time_period': 

In [82]:
# If any data was collected, convert it to a DataFrame and export to CSV
if all_data:
    df = pd.DataFrame(all_data)
    csv_file_path = os.path.join(outpath,"{}".format(p2_var))
    df.to_csv(csv_file_path, index=False)
    print(f"Data successfully exported to {csv_file_path}")
else:
    print("No data was fetched to export.")

Data successfully exported to /Users/kelseydoerksen/code/unicef/ccri/data/CCRI_results_misc/P2_under5_mortality.csv
