<a href="https://colab.research.google.com/github/dohyung-kim/ccri/blob/main/script/adm0/sdmx_api_p2_data_download.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import geopandas as gpd
import requests
import pandas as pd
import xml.etree.ElementTree as ET

outpath = "/content/drive/p2_vulnerability/"

# Function to fetch data for a single country using the UNICEF dataflow endpoint
def fetch_country_data(iso3, api_url_template):
    """
    Fetch data for a given country using its ISO3 code.
    Returns the most recent observation as a dictionary, or None if data is missing.
    """
    # Construct URL using Python's format method
    api_url = api_url_template.format(iso3=iso3)
    try:
        response = requests.get(api_url)
        if response.status_code == 200:
            try:
                # Parse XML using fromstring (handles encoding better)
                root = ET.fromstring(response.content)
            except ET.ParseError as pe:
                print(f"XML parsing error for {iso3}: {pe}")
                return None

            # The Series element is not namespaced in this response
            series = root.find(".//Series")
            if series is not None:
                obs_list = series.findall("Obs")
                if obs_list:
                    data = []
                    for obs in obs_list:
                        time_period_str = obs.get("TIME_PERIOD")
                        obs_value_str = obs.get("OBS_VALUE")
                        try:
                            time_period = int(time_period_str)
                            obs_value = float(obs_value_str)
                        except Exception as conv_err:
                            print(f"Conversion error for {iso3}: {conv_err}")
                            continue
                        data.append({
                            "iso3": iso3,
                            "time_period": time_period,
                            "obs_value": obs_value,
                            "data_source": obs.get("DATA_SOURCE")
                        })
                    if data:
                        # Choose the most recent record based on the time period
                        most_recent = max(data, key=lambda x: x["time_period"])
                        return most_recent
                    else:
                        print(f"No valid observation data for {iso3}")
                        return None
                else:
                    print(f"No observation elements found for {iso3}")
                    return None
            else:
                print(f"No Series element found for {iso3}")
                return None
        else:
            print(f"Failed to fetch data for {iso3}. HTTP Status: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error processing {iso3}: {e}")
        return None

# File path for the GeoJSON (update this path as needed)
geojson_file_path = "/content/drive/MyDrive/CCRI/global_bnd_adm0.geojson"

# Load the GeoJSON file containing country boundaries and ISO3 codes
countries_gdf = gpd.read_file(geojson_file_path)

# Verify the GeoDataFrame columns to ensure the ISO3 field exists
print("GeoDataFrame columns:", countries_gdf.columns)

# Extract ISO3 codes (update the field name if it's not exactly 'iso3')
iso3_codes = countries_gdf['iso3'].unique()

# This uses {iso3} as a placeholder that will be replaced with the actual ISO3 code.
api_url_template = (
    #this is a api url for wash basic hygiene - replace for other p2 variables
    "https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/UNICEF,WASH_HOUSEHOLDS,1.0/{iso3}.WS_PPL_H-B.HYG._T._T?format=sdmx-compact-2.1"
)

# List to store fetched data for all countries
all_data = []

# Loop through ISO3 codes, fetch data and add it to the list if available
for iso3 in iso3_codes:
    print(f"Processing {iso3}...")
    result = fetch_country_data(iso3, api_url_template)
    if result:
        print(f"Data for {iso3}: {result}")
        all_data.append(result)
    else:
        print(f"No data found for {iso3}")

# If any data was collected, convert it to a DataFrame and export to CSV
if all_data:
    df = pd.DataFrame(all_data)
    csv_file_path = os.path.join(outpath,"P2_basic_hygiene.csv")
    df.to_csv(csv_file_path, index=False)
    print(f"Data successfully exported to {csv_file_path}")
else:
    print("No data was fetched to export.")


GeoDataFrame columns: Index(['OBJECTID', 'iso3', 'adm0_name', 'map_lab', 'adm0_altnm', 'adm0_id',
       'mapclr', 'rb', 'disp_area', 'stscod', 'source', 'source_date',
       'lst_update', 'validity', 'Shape_Length', 'Shape_Area', 'geometry'],
      dtype='object')
Processing AFG...
Data for AFG: {'iso3': 'AFG', 'time_period': 2022, 'obs_value': 48.21469648071023, 'data_source': 'WHO/UNICEF Joint Monitoring Programme (2024)'}
Processing xAC...
Failed to fetch data for xAC. HTTP Status: 404
No data found for xAC
Processing ALB...
Failed to fetch data for ALB. HTTP Status: 404
No data found for ALB
Processing DZA...
Data for DZA: {'iso3': 'DZA', 'time_period': 2022, 'obs_value': 84.80409719593644, 'data_source': 'WHO/UNICEF Joint Monitoring Programme (2024)'}
Processing ASM...
Failed to fetch data for ASM. HTTP Status: 404
No data found for ASM
Processing AND...
Failed to fetch data for AND. HTTP Status: 404
No data found for AND
Processing AGO...
Data for AGO: {'iso3': 'AGO', 'time_per