Script to calculate the equivalent GloFAS thresholds compared to the observed thresholds. Uses GloFAS reanalysis data from 1979. Uses quantile mapping approach to map values between the observed and reanalysis dataset. 

In [21]:
import numpy as np
import pandas as pd
from scipy import stats
import os

In [22]:
# define country and directory
country = 'mozambique'
directory = '/s3/scratch/jamie.towner/flood_aa'

In [23]:
# define paths to data
metadata_directory = os.path.join(directory, country, "data/metadata")
observed_data_directory = os.path.join(directory, country, "data/observations/gauging_stations/all_stations")
reanalysis_data_directory = os.path.join(directory, country, "data/forecasts/glofas_reanalysis/all_stations")

observed_data_file = "observations_complete_series.csv"
reanalysis_data_file = "glofas_reanalysis_moz_complete.csv"
station_info_file = "metadata_observations.csv"

# load data
observed_data_path = os.path.join(observed_data_directory, observed_data_file)
reanalysis_data_path = os.path.join(reanalysis_data_directory, reanalysis_data_file)
station_info_path = os.path.join(metadata_directory, station_info_file)

observed_data = pd.read_csv(observed_data_path)
reanalysis_data = pd.read_csv(reanalysis_data_path)
station_info = pd.read_csv(station_info_path)

In [24]:
# convert date columns to datetime
observed_data["date"] = pd.to_datetime(observed_data["date"], format='mixed')
reanalysis_data["date"] = pd.to_datetime(reanalysis_data["date"], format='mixed')

In [25]:
# initialize list to store results
results = []

# loop over each station and threshold in metadata
for index, row in station_info.iterrows():
    station = row['station name']
    
    # get observed and reanalysis data for the station
    data_observed = observed_data[station].dropna().values
    data_reanalysis = reanalysis_data[station].dropna().values

    # standardize both datasets (z-score normalization)
    obs_mean, obs_std = np.mean(data_observed), np.std(data_observed)
    reanalysis_mean, reanalysis_std = np.mean(data_reanalysis), np.std(data_reanalysis)

    z_observed = (data_observed - obs_mean) / obs_std
    z_reanalysis = (data_reanalysis - reanalysis_mean) / reanalysis_std

    # define thresholds to loop over
    thresholds = {
        'obs_bankfull': row['obs_bankfull'],
        'obs_moderate': row['obs_moderate'],
        'obs_severe': row['obs_severe']
    }

    # loop over each threshold
    for threshold_name, threshold_value in thresholds.items():
        # convert threshold to z-score in observed data space
        z_threshold = (threshold_value - obs_mean) / obs_std

        # get percentile rank of threshold in observed data
        percentile_rank_observed = stats.percentileofscore(z_observed, z_threshold)

        # ensure percentiles are within valid range
        percentile_rank_observed = max(0, min(percentile_rank_observed, 100))

        # interpolate the corresponding value in reanalysis data
        percentiles = np.linspace(0, 100, len(z_reanalysis))
        z_mapped = np.interp(percentile_rank_observed, percentiles, np.sort(z_reanalysis))

        # convert back to the original reanalysis scale
        value_reanalysis = (z_mapped * reanalysis_std) + reanalysis_mean

        # store results
        results.append({
            'station': station,
            'threshold_name': threshold_name,
            'threshold_value': threshold_value,
            'percentile_rank_observed': percentile_rank_observed,
            'value_reanalysis': value_reanalysis
        })

# convert results to a dataframe
results_df = pd.DataFrame(results)

In [26]:
results_df

Unnamed: 0,station,threshold_name,threshold_value,percentile_rank_observed,value_reanalysis
0,goonda,obs_bankfull,6.0,99.463209,1937.250414
1,goonda,obs_moderate,7.2,99.768767,2619.685637
2,goonda,obs_severe,8.44,99.917417,3686.643762
3,dombe,obs_bankfull,6.0,97.089212,373.941263
4,dombe,obs_moderate,9.37,99.900011,1188.068443
5,dombe,obs_severe,9.79,99.933341,1488.293314
6,espungabera,obs_bankfull,3.7,99.83789,265.205595
7,espungabera,obs_moderate,3.8,99.88112,299.391494
8,espungabera,obs_severe,4.21,99.956771,466.700751
9,revue,obs_bankfull,4.5,100.0,818.8594
