In [1]:
import requests
from pathlib import Path
import logging

import pandas as pd

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

PROJECT_ROOT = Path.cwd().parents[1]
DATA_PATH = PROJECT_ROOT / "data"

In [None]:
# Based on https://climateknowledgeportal.worldbank.org/download-data#htab-1499
data_dictionary = {
    "scenario": {
        "historical": "Historical",
        "ssp119": "SSP1-1.9",
        "ssp126": "SSP1-2.6",
        "ssp245": "SSP2-4.5",
        "ssp370": "SSP3-7.0",
        "ssp585": "SSP5-8.5",
    },
    "collection": {
        "chaz-x0.5": "Columbia HAZard Model (CHAZ, Lee et al. 2018)",
        "cmip6-x0.25": "CMIP6 0.25-degree",
        "cru-x0.5": "CRU 0.5-degree",
        "era5-x0.25": "ERA5 0.25-degree",
        "pop-x1": "Population and Poverty",
    },
    "aggregation": {
        "annual": "Annual",
        "monthly": "Monthly",
        "period": "Period",
        "seasonal": "Seasonal",
    },
    "percentiles": {
        "mean": "Mean",
        "median": "Median or 50th Percentile of the Multi-Model Ensemble",
        "p10": "10th Percentile of the Multi-Model Ensemble",
        "p90": "90th Percentile of the Multi-Model Ensemble",
    },
    "products": {
        "heatplot": {
            "label": "Heatplot",
            "description": "The heatplot shows seasonal either means or anomalies across longer-term time horizons. Monthly data is averaged across each ten-year period for the aggregation period.",
        },
        "hicat": {
            "label": "Heat Category (HICAT)",
            "description": "Heat Category (HICAT) is a classification of heat extremes based on the projected temperature anomalies.",
        },
        "tcfraction": {
            "label": "Tropical Cyclone Fraction",
            "description": "Percentage of tropical cyclones in each category relative to the total number of tropical cyclones.",
        },
        "counts": {
            "label": "Storm Count",
            "description": "Number of tropical cyclones per year in the selected cyclone category.",
        },
        "returnperiod": {
            "label": "Return Period",
            "description": "Return period of cyclones of at least the selected category.",
        },
        "trend": {
            "label": "Trend",
            "description": "The detection, estimation and prediction of trends and associated statistical and physical significance are important aspects of understanding climate and changes in climate. Trend is the rate at which change occurs over a time period. The trend may be linear or non-linear.",
        },
        "anomaly": {
            "label": "Anomaly",
            "description": "A departure from the reference value. Using temperature as an example, a positive anomaly indicates that the projected temperature was warmer than the reference value, while a negative anomaly indicates that the projected temperature was cooler than the reference value.",
        },
        "agepyramid": {
            "label": "Age Pyramid",
            "description": "A graphical illustration of the distribution of a population of a country by age groups.",
        },
        "timeseries": {
            "label": "Time Series",
            "description": "A sequence of data points, typically annual, which occur in successive order over a designated time horizon.",
        },
        "yearofchange": {
            "label": "Year of Change",
            "description": "Year of Change represents the statistically significant departure of the selected variable from the historical natural variability bounds due to the emergence of an anthropogenically forced trend.",
        },
        "percentchangecounts": {
            "label": "Percent Change in Storm Counts",
            "description": "Anomaly in annual exceedance probability (or return period) expressed as the fractional change between historical and future scenarios.",
        },
        "faep": {
            "label": "Fractional Change in Annual Exceedance Probability",
            "description": "Anomaly in annual exceedance probability calculated as fractional change (future/historical). Values less than 1 indicate decreased probability or frequency (increased return period), while values greater than 1 indicate increased probability or frequency (decreased return period).",
        },
        "fcounts": {
            "label": "Fractional Change in Storm Counts",
            "description": "Fractional change in the number of tropical cyclones per year for the selected cyclone category between historical and future scenarios.",
        },
        "freturnperiod": {
            "label": "Fractional Change in Return Period",
            "description": "Anomaly in return period expressed as the fractional change between historical and future scenarios.",
        },
        "climatology": {
            "label": "Climatology",
            "description": "The calculation of uniform periods, typically 20, 30, 50-years, consisting of annual, seasonal, and monthly, averages of temperature, precipitation, and other climatological variables.",
        },
    },
}


In [28]:
mena_countries = {
    "AFG": "Afghanistan",
    "DZA": "Algeria",
    "BHR": "Bahrain",
    "DJI": "Djibouti",
    "EGY": "Egypt",
    "IRN": "Iran",
    "IRQ": "Iraq",
    "JOR": "Jordan",
    "KWT": "Kuwait",
    "LBN": "Lebanon",
    "LBY": "Libya",
    "MAR": "Morocco",
    "OMN": "Oman",
    "QAT": "Qatar",
    "SAU": "Saudi Arabia",
    "SYR": "Syria",
    "TUN": "Tunisia",
    "ARE": "United Arab Emirates",
    "PSE": "West Bank and Gaza",
    "YEM": "Yemen",
}

In [None]:
def fetch_climate_data(
    geocode: str,
    collection: str = "cmip6-x0.25",
    type: str = "timeseries",
    variable: str = "spei12",
    product: str = "timeseries",
    aggregation: str = "annual",
    period: str = "2015-2100",
    percentile: str = "median",
    scenario: str = "ssp119",
    model: str = "ensemble",
    model_calculation: str = "all",
    statistic: str = "mean",
) -> dict:
    """Fetch climate data from World Bank Climate Change Knowledge Portal API"""

    base_url = "https://cckpapi.worldbank.org/cckp/v1/"
    endpoint_params = [
        collection,
        type,
        variable,
        product,
        aggregation,
        period,
        percentile,
        scenario,
        model,
        model_calculation,
        statistic,
    ]
    endpoint = "_".join(endpoint_params) + f"/{geocode}"
    url = base_url + endpoint
    response = requests.get(url, params={"_format": "json"})
    return response.json()


def fetch_hicat_data(is_subnational=True) -> pd.DataFrame:
    """Fetch Heat Category (HICAT) data for MENA countries"""
    hicat_params = {
        "collection": "cmip6-x0.25",
        "type": "heatplot",
        "variable": "hicat",
        "product": "heatplot",
        "aggregation": "monthly",
        "period": "1951-2100",
        "percentile": "median",
        "scenario": "ssp119",
        "model": "ensemble",
        "model_calculation": "all",
        "statistic": "mean",
    }

    if is_subnational:
        hicat_file = DATA_PATH / "hicat_data_subnational.csv"
        if hicat_file.exists():
            logger.info(f"Loading HICAT data from {hicat_file}")
            return pd.read_csv(hicat_file)

        logger.info("Fetching HICAT subnational data for MENA region")
        hicat_params["geocode"] = "region_mea"
        hicat_df = pd.DataFrame(fetch_climate_data(**hicat_params)["data"])

    else:
        hicat_file = DATA_PATH / "hicat_data.csv"
        if hicat_file.exists():
            logger.info(f"Loading HICAT data from {hicat_file}")
            return pd.read_csv(hicat_file)

        hicat_dfs = []
        for country_code, country_name in mena_countries.items():
            logger.info(f"Fetching HICAT data for {country_name} ({country_code})")
            hicat = fetch_climate_data(geocode=country_code, **hicat_params)
            hicat_dfs.append(pd.DataFrame(hicat["data"]))
        hicat_df = pd.concat(hicat_dfs, axis=1)

    hicat_df = (
        hicat_df.reset_index()
        .rename(columns={"index": "year"})
        .melt(id_vars=["year"], var_name="country", value_name="heat_category")
        .assign(**hicat_params)
    )
    hicat_df.to_csv(hicat_file, index=False)
    logger.info(f"Saved HICAT data to {hicat_file}")
    return hicat_df

INFO:__main__:Loading HICAT data from /Users/farhanreynaldo/Documents/world-bank/git-repo/MENA-FCV-economic-monitor/data/hicat_data.csv


In [59]:
hicat_sub = pd.read_csv(DATA_PATH / "hicat_data_subnational.csv")
hicat_nat = pd.read_csv(DATA_PATH / "hicat_data.csv")

In [61]:
hicat_sub.query("country == 'DJI'")

Unnamed: 0,year,country,heat_category,collection,type,variable,product,aggregation,period,percentile,scenario,model,model_calculation,statistic,geocode
3780,1951-01,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3781,1951-02,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3782,1951-03,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3783,1951-04,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3784,1951-05,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3955,2091-08,DJI,4,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3956,2091-09,DJI,4,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3957,2091-10,DJI,4,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea
3958,2091-11,DJI,3,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean,region_mea


In [63]:
hicat_nat.query("country == 'DJI'")

Unnamed: 0,year,country,heat_category,collection,type,variable,product,aggregation,period,percentile,scenario,model,model_calculation,statistic
540,1951-01,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
541,1951-02,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
542,1951-03,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
543,1951-04,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
544,1951-05,DJI,0,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,2091-08,DJI,4,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
716,2091-09,DJI,4,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
717,2091-10,DJI,4,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
718,2091-11,DJI,3,cmip6-x0.25,heatplot,hicat,heatplot,monthly,1951-2100,median,ssp119,ensemble,all,mean
