# VIC Calibration

Original code by George K. Darkwah. Modified by Pritam Das.

check https://stackoverflow.com/questions/64580550/cut-netcdf-files-by-shapefile#:~:text=import%20xarray%20import%20geopandas%20from%20shapely.geometry%20import%20mapping,%3D%20nc_file.rio.clip%20%28sf.geometry.apply%20%28mapping%29%2C%20sf.crs%2C%20all_touched%20%3D%20True%29

In [4]:
import xarray as xr
import geopandas as gpd
import rasterio as rio
from rasterio.features import geometry_mask
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import rioxarray as rxr
import os
import hydrostats.metrics as hm
import shutil
from pathlib import Path
from ruamel_yaml import YAML
import altair as alt

import hvplot.pandas
import holoviews as hv

from rat.run_rat import run_rat

In [5]:
# create a dictionary for all the dams in this analysis with their paths and parameters
cal_points = {
    # "J._Percy_Priest_Dam": {
    #     "obs_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data/insitu/J._Percy_Priest_Dam.csv',
    #     "cal_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/final_outputs/inflow/J._Percy_Priest_Dam.csv',
    # },
    "Center_Hill_Dam": {
        "obs_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data/insitu/Center_Hill_Dam.csv',
        "cal_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/final_outputs/inflow/Center_Hill_Dam.csv',
    },
    "Dale_Hollow_Dam": {
        "obs_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data/insitu/Dale_Hollow_Dam.csv',
        "cal_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/final_outputs/inflow/Dale_Hollow_Dam.csv',
    },
    "Laurel_Dam": {
        "obs_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data/insitu/Laurel_Dam.csv',
        "cal_inflow_path": '/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/final_outputs/inflow/Laurel_Dam.csv',
    }
}

shapefile_dir = "/water2/pdas47/2023_01_24-river-regulation/george-calibration/cumberland-upstream-subbasins"
reference_params = "/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/vic/vic_basin_params/vic_soil_param_reference.nc"
calib_params = "/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/vic/vic_basin_params/vic_soil_param.nc"
config_file_path = (
    "/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/cumberland-calibration.yaml"
)

In [16]:
# function to iterate through the features in the shapefile and set paramter values. thank you AI
def set_param_values(xds, shapefile, param, transform):
    """sets the value of vic parameters for all features in a shapefile according to given parameter values.

    Args:
        xds (_type_): _description_ TODO
        shapefile (GeoDataFrame): shapefile containing the features to set the parameter values for.
        param (dict): paramter values to set for each feature in the shapefile. Key = paramter name, value = paramter value. v0.1 currently only supports setting one parameter at a time.
        transform (_type_): _description_ TODO refactor to not be passed as an argument. temporary implementation to get the function working.
    """
    for _, row in shapefile.iterrows():
        shapeMask = rio.features.geometry_mask(
            [row.geometry],
            out_shape=(xds.dims["lat"], xds.dims["lon"]),
            transform=transform,
            invert=False,
        )
        shapeMask = xr.DataArray(shapeMask, dims=["lat", "lon"])

        # iterate through the parameter dictionary and set the value of the parameters in the dataset
        for key, value in param.items():
            xds[key] = xds[key].where(shapeMask.data, value)
    
    return xds

# function to clear existing rat outputs, modify the parameter values, save it, run rat
def run_rat_with_params(params, config_file_path, results_dir, start_date="2015-01-01", end_date="2016-09-30"):
    """runs the rat model with the given parameters.

    Args:
        params (dict): dictionary of parameters to set. Key = parameter name, value = parameter value.
        config_file_path (str): path to the rat config file.
    """
    # Open the basin grid tif file and extract the crs
    with rxr.open_rasterio(
        "/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland/basin_grid_data/cumberland_grid_mask.tif"
    ) as src:
        crs = src.rio.crs
        transform = src.rio.transform()

    # Open the netcdf file and set the crs, transform and spatial dimensions
    with xr.open_dataset(reference_params, decode_coords="all") as ds:
        xds = ds.rio.set_crs(crs)
        xds = xds.rio.write_crs(crs)
        xds = xds.rio.write_transform(transform, inplace=True)

    sf = gpd.read_file("/water2/pdas47/2023_01_24-river-regulation/george-calibration/cumberland-upstream-subbasins/upstream_subbasins.geojson")

    cal_ds = set_param_values(xds, sf, params, transform)

    if Path(calib_params).exists():
        os.remove(calib_params)

    cal_ds.to_netcdf(calib_params)

    # clear existing rat outputs
    basin_data_dir = Path('/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/tennessee/basins/cumberland')
    directories_to_be_deleted = [
        basin_data_dir / 'rat_outputs',
        basin_data_dir / 'final_outputs',
        basin_data_dir / 'vic' / 'vic_outputs'
    ]

    for directory in directories_to_be_deleted:
        if directory.exists():
            shutil.rmtree(directory)

    # run the rat model
    run_rat(config_file_path)

    # read in the rat outputs, read in observed data, combine them and save them
    
    comparison_dfs = []
    stats_dfs = []
    for point in cal_points:
        obs = pd.read_csv(cal_points[point]["obs_inflow_path"], parse_dates=["date"]).rename(columns={"inflow": "obs_inflow"})[
            ["date", "obs_inflow"]
        ]
        obs['obs_inflow'] = obs['obs_inflow'] * (24 * 60 * 60)
        
        cal = pd.read_csv(cal_points[point]["cal_inflow_path"], parse_dates=["date"])[["date", "inflow (m3/d)"]]
        cal = cal.rename(columns={"inflow (m3/d)": "cal_inflow"})

        # merge the observed and calibrated inflow data
        comparison_df = pd.merge(
            obs, cal, on="date", how="inner"
        )  # inner join to remove nan values

        comparison_df.loc[:, "station"] = point
        comparison_df = comparison_df.loc[(comparison_df['date'] >= start_date) & (comparison_df['date'] <= end_date), :]

        comparison_dfs.append(comparison_df)

        # calculate the statistics. NSE, NRMSE (mean), KGE, pearson-r, r_squared
        stats_df = pd.DataFrame(
            {
                "station": [point],
                "nse": [hm.nse(comparison_df["obs_inflow"], comparison_df["cal_inflow"])],
                "nrmse": [hm.nrmse_mean(comparison_df["obs_inflow"], comparison_df["cal_inflow"])],
                "kge": [hm.kge_2012(comparison_df["obs_inflow"], comparison_df["cal_inflow"])],
                "pearson-r": [hm.pearson_r(comparison_df["obs_inflow"], comparison_df["cal_inflow"])],
                "r_squared": [hm.r_squared(comparison_df["obs_inflow"], comparison_df["cal_inflow"])],
            }
        )
        stats_dfs.append(stats_df)

    combined_comparison_df = pd.concat(comparison_dfs)
    combined_stats_df = pd.concat(stats_dfs)

    # determine name of directory to save into using date. if directory exists, increment the number
    i = 0
    d = date.today().strftime("%Y_%m_%d")
    results_sub_dir = Path(results_dir) / f"{i:03}_{d}"
    while results_sub_dir.exists():
        i += 1
        results_sub_dir = Path(results_dir) / f"{i:03}_{d}"
    results_sub_dir.mkdir()

    # save the comparison dataframe to csv
    savep = results_sub_dir / f"comparison_dataframes.csv"
    combined_comparison_df.to_csv(
        savep,
        index=False,
    )

    # save the statistics dataframe to csv
    savep = results_sub_dir / f"statistics.csv"
    combined_stats_df.to_csv(
        savep,
        index=False,
    )

    # plot the comparison dataframes. observed and calibrated inflows as lines. facet by station. interactive
    plot = combined_comparison_df.hvplot.line(
        x='date', y=['obs_inflow', 'cal_inflow'], groupby='station', width=800, height=400, shared_axes=False, grid=True, legend='top_left'
    )
    
    plot_savep = results_sub_dir / f"comparison_plot.html"
    hv.save(plot, plot_savep)

    # save params as yaml file
    yaml = YAML()
    params_savep = results_sub_dir / f"params.yaml"
    yaml.dump(params, params_savep)


##############################################


params = {
    'Ws': 1,
    'infilt': 0.1,
    'Ds': 0.005,
}

run_rat_with_params(
    params, 
    "/water2/pdas47/2023_01_24-river-regulation/data-cumberland-calibration/cumberland-calibration.yaml",
    "/water2/pdas47/2023_01_24-river-regulation/george-calibration/calib_results_try_2"
)