# Streamflow prediction model comparison

The objective of this notebook is to visually compare the streamflow prediction models

In [None]:
from pathlib import Path
import hvplot.pandas
import pandas as pd
import xarray as xr

DATA_PATH = Path('../../data')

#### Rainfall and streamflow comparison

In [None]:
def normalize(df): return (df-df.min())/(df.max()-df.min())
def smooth(df, window, missing_val=0): return df.rolling(window=window).sum().fillna(missing_val)

start = "2019-01-01"
end = "2020-12-31"

flow = pd.read_csv(
    DATA_PATH/'data_cumul.csv',  sep=';', usecols=['time', 'débit_mgb', 'débit_insitu', "P_cumul_7j"], index_col='time', converters={"time": pd.to_datetime}
    )
flow = normalize(flow)
flow = flow.rename(columns={"débit_mgb": "mgb", "débit_insitu": "obs", "P_cumul_7j": "imerg"})
flow = flow[start:end]

tamsat = xr.load_dataset(DATA_PATH/"tamsat_sub4_senegal_daily_total.nc").sel(time=slice(start, end))
tamsat = smooth(tamsat.to_dataframe(), 7)
tamsat = normalize(tamsat)

flow["tamsat"] = tamsat["rfe"]

In [None]:
flow.hvplot.line(x="time", y=["mgb", "obs", "imerg", "tamsat"], grid=True, title="Rainfall and streamflow comparison")

#### Observed, simulated and predicted streamflow comparisons

In [None]:

mgb = pd.read_csv(
    DATA_PATH/'data_cumul.csv',  sep=';', usecols=['time', 'débit_mgb'], index_col='time', converters={"time": pd.to_datetime}
    )
mgb = mgb.to_xarray().rename({"débit_mgb": "mgb"})
tamsat_reg = xr.open_dataset(DATA_PATH/'tamsat_regression_benchmark.nc')
tamsat_mlp = xr.open_dataset(DATA_PATH/'mlp_with_tamsat.nc')

data = xr.merge([tamsat_reg, tamsat_mlp])
data["mgb"] = mgb["mgb"]


In [None]:
def plot_model_comparison(data, forecast_horizon=10, width=1000, height=600):
    """Plot line comparison between model predictions, observations and MGB data for given forecast horizon."""
    t_horizon = data.sel(forecast_horizon=f"t+{forecast_horizon}")[["pred", "obs", "mgb"]].dropna(dim="time")
    to_plot = xr.merge([t_horizon[["obs", "mgb"]], t_horizon["pred"].to_dataset(dim="model")])
    to_plot = to_plot.drop_vars([c for c in to_plot.coords if not to_plot[c].dims])
    return to_plot.to_dataframe().hvplot.line(
        width=width, 
        height=height, 
        grid=True,
        title=f"Forecast Horizon: {forecast_horizon} days",
        legend='top',
    )

In [None]:
plot_model_comparison(data)

In [None]:
data.sel(forecast_horizon="t+10", model="SimpleRegularizedMLP")["scores"].to_dataframe()["scores"]