# Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import xarray

In [4]:
from backend import loading_utils
from backend import data_paths
from backend import gauge_groups_utils
from backend import metrics_utils
from backend import evaluation_utils

In [5]:
RESTART = True

# Metadata

## Gauge Groups

In [6]:
gauges = gauge_groups_utils.get_full_gauge_group()
print(f'There are {len(gauges)} gauges.')

There are 5678 gauges.


In [7]:
# gauges = gauges[50:100]

# Google

In [8]:
_EXPERIMENTS = data_paths.EXPERIMENTS

## Load Data: Google Model Runs

In [9]:
google_model_runs = loading_utils.load_all_experimental_model_runs(
  gauges=gauges,
)

Working on experiment: kfold_splits


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5678/5678 [00:00<00:00, 8989.53it/s]


ValueError: must supply at least one object to concatenate

In [None]:
# Count missing gauges per experiment.
for experiment in google_model_runs:
    missing_gauges = set(gauges) - set(google_model_runs[experiment].gauge_id.values)
    print(f'There are {len(missing_gauges)} (out of {len(gauges)}) missing gauges in experiemnt {experiment}.')

## Load Data: GRDC Observation Data

In [None]:
# Load GRDC observation data.
grdc_observation_data = loading_utils.load_grdc_data()

# Add observation data to model run xarrays, and delete redundant varaible to save memory.
for experiment in google_model_runs.keys():
    google_model_runs[experiment] = xarray.merge(
        [google_model_runs[experiment], grdc_observation_data])

## Metrics: 2014 - Present

In [None]:
google_validation_time_periods = {
    gauge: ['2014-01-01', '2023-01-01'] for gauge in gauges
}

metrics = metrics_utils.calculate_and_save_metrics_for_many_gagues_and_many_models(
    restart=RESTART,
    experiments=_EXPERIMENTS,
    ds=google_model_runs,
    gauges=gauges,
    sim_variable=metrics_utils.GOOGLE_VARIABLE,
    obs_variable=metrics_utils.OBS_VARIABLE,
    base_path=data_paths.PER_METRIC_GOOGLE_2014_HYDROGRAPH_METRICS_DIR,
    breakpoints_path=data_paths.PER_GAUGE_GOOGLE_2014_HYDROGRAPH_METRICS_DIR,
    time_periods=google_validation_time_periods,
)
metrics[_EXPERIMENTS[0]]['NSE'].replace(-np.inf, np.nan).describe()

## Metrics: 1980 - Present

In [None]:
metrics = metrics_utils.calculate_and_save_metrics_for_many_gagues_and_many_models(
    restart=RESTART,
    experiments=_EXPERIMENTS,
    ds=google_model_runs,
    gauges=gauges,
    sim_variable=metrics_utils.GOOGLE_VARIABLE,
    obs_variable=metrics_utils.OBS_VARIABLE,
    base_path=data_paths.PER_METRIC_GOOGLE_1980_HYDROGRAPH_METRICS_DIR,
    breakpoints_path=data_paths.PER_GAUGE_GOOGLE_1980_HYDROGRAPH_METRICS_DIR,
    lead_times=[0]
)
metrics[_EXPERIMENTS[0]]['NSE'].replace(-np.inf, np.nan).describe()

## Delete Variables to Clear Memory

In [None]:
del google_model_runs

# GloFAS

In [None]:
_EXPERIMENTS = [metrics_utils.GLOFAS_VARIABLE]

## Load Data: GloFAS

In [None]:
glofas_model_runs = loading_utils.load_glofas_model_runs(gauges=gauges)

In [None]:
# Count missing gauges.
missing_gauges = len(gauges) - len(glofas_model_runs.gauge_id)
print(f'There are {missing_gauges} (out of {len(gauges)}) missing gauges in glofas runs.')

In [None]:
# Merge everything into one large xarray.
# This xarray merge takes ... forever ...
glofas_model_runs = xarray.merge(
    [glofas_model_runs, grdc_observation_data.sel(lead_time=0)])

## Metrics: 2014 - Present

In [None]:
metrics = metrics_utils.calculate_and_save_metrics_for_many_gagues_and_many_models(
    restart=RESTART,
    experiments=_EXPERIMENTS,
    ds={exp: glofas_model_runs for exp in _EXPERIMENTS},
    gauges=gauges,
    sim_variable=metrics_utils.GLOFAS_VARIABLE,
    obs_variable=metrics_utils.UNNORMALIZED_OBS_VARIABLE,
    base_path=data_paths.PER_METRIC_GLOFAS_2014_HYDROGRAPH_METRICS_DIR,
    breakpoints_path=data_paths.PER_GAUGE_GLOFAS_2014_HYDROGRAPH_METRICS_DIR,
    time_periods=google_validation_time_periods,
    lead_times=[0]
)
metrics[_EXPERIMENTS[0]]['NSE'].describe()

## Metrics: 1980 - Present

In [None]:
metrics = metrics_utils.calculate_and_save_metrics_for_many_gagues_and_many_models(
    restart=RESTART,
    experiments=_EXPERIMENTS,
    ds={exp: glofas_model_runs for exp in _EXPERIMENTS},
    gauges=gauges,
    sim_variable=metrics_utils.GLOFAS_VARIABLE,
    obs_variable=metrics_utils.UNNORMALIZED_OBS_VARIABLE,
    base_path=data_paths.PER_METRIC_GLOFAS_1980_HYDROGRAPH_METRICS_DIR,
    breakpoints_path=data_paths.PER_GAUGE_GLOFAS_1980_HYDROGRAPH_METRICS_DIR,
    lead_times=[0]
)
metrics[_EXPERIMENTS[0]]['NSE'].describe()

# Plot Hydrograph Metric CDFs

In [None]:
evaluation_utils.hydrograph_metrics_cdf_plots(
    glofas_basepath=data_paths.PER_METRIC_GLOFAS_2014_HYDROGRAPH_METRICS_DIR,
    google_basepath=data_paths.PER_METRIC_GOOGLE_2014_HYDROGRAPH_METRICS_DIR,
    title='2014-2022 with Lead Times',
    lead_times=list(range(8))
)
evaluation_utils.save_figure(data_paths.HYDROGRAPH_METRICS_WITH_LEAD_TIMES_CDFS_FILENAME)

In [None]:
evaluation_utils.hydrograph_metrics_cdf_plots(
    glofas_basepath=data_paths.PER_METRIC_GLOFAS_1980_HYDROGRAPH_METRICS_DIR,
    google_basepath=data_paths.PER_METRIC_GOOGLE_1980_HYDROGRAPH_METRICS_DIR,
    title='1984-2022 Ungauged Cross Validation Splits',
    lead_times=[0]
)
evaluation_utils.save_figure(data_paths.HYDROGRAPH_METRICS_GLOFAS_UNGAUGED_CDFS_FILENAME)