![image](https://www.ewatercycle.org/img/logo.png)

# MARRMoT-m01 - a test notebook for running the MARRMoT-m01 model with known parameters (ie. already calibrated)
This notebooks runs the MARRMoT-m01 model for a single catchment and a single forcing dataset. The output cannot be trusted AT ALL because this testrun is meant to illustrate the workflow.

Specific settings for this test run include the catchment, corresponding station ID, forcing dataset, a calibrated parameter and the number of days to spinup and run the model instead of the full period.

## Import statements
We'll be using the following modules:

In [None]:
from os import environ, remove
import os
import time
from datetime import datetime
from pathlib import Path

import hydrostats.metrics as hm
import hydrostats.visual as hv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
import xarray as xr
from cartopy.io import shapereader
from cftime import num2date
from ewatercycle.observation.grdc import get_grdc_data
from grpc4bmi.bmi_client_docker import BmiClientDocker
from scipy import optimize
from tqdm import tqdm

## General settings
The cell below defines all the settings for this example, including locations of files and folders needed. 

TODO: rename ALL CAPS to normal variables: no global variables please.
TODO: make everything camelCase, or under_score style, but not mixed.

In [None]:
# The project home directory where data lives and output should be written
PROJECT_HOME = Path(os.path.abspath("/mnt/home/user42"))
DATA_HOME = Path(os.path.abspath("/mnt/data/examples/technical_paper"))

# The location where this notebook will put a timestamped temp dir
TEMP_LOC = PROJECT_HOME / "temp"

# forcing dataset and directory
FORCING_DIR = DATA_HOME / "marrmot" / "marrmotForcing" / "validation"
forcing_file = "marrmot_ERA5_Moselle_2001_2016.mat"

#these two folders will be known inside the model-container as /data/input and /data/output
containerIn = PROJECT_HOME / 'inputMarrmot'
containerOut = PROJECT_HOME / 'outputMarrmot'

# this file will be created in 'containerIn' and contains the config setting for the model run. 
config_file_name = 'config.mat'

In [None]:
# test period for both validation and calibration (Year, Month, Date)
period = {
    "spinup": datetime(2001, 1, 1),
    "start": datetime(2001, 2, 1),
    "end": datetime(2001, 12, 31),
}

In [None]:
# variable of interest to get out of the model
variable = "flux_out_Q"

# flux_out_Q unit conversion factor from mm/day to m3/s
conversion = 1 / (1000 * 86400)

# parameters, in this case max soil moisture storage (in mm), ranging between 100 en 2000
# https://github.com/wknoben/MARRMoT/blob/dev-docker-BMI/MARRMoT/Models/Parameter%20range%20files
parameters = [float(1500)]

In [None]:
#location of GRDC data on jupyter.ewatercycle.org
environ['GRDC_DATA_HOME']= '/mnt/data/grdc/dailies' 
catchment = "Moselle"
station_id = "6335020" #This is the RHINE!!! Moselle not available 


## Functions and created directories needed

In [None]:
# add a timestamped directory to the main tempdir
TEMP_DIR = TEMP_LOC / f"marrmot-m01_{time.strftime('%Y%m%d_%H%M%S')}"
TEMP_DIR.mkdir(parents=True, exist_ok=True)
print(TEMP_DIR)

In [None]:
#TODO this should be named marrmot specific
#changed to accept a filename instead of constructing it
#changed to get passed variables instead of using globals

def write_marrmot_config(parameters, catchment, period, forcing_file_loc, config_file_loc , model_name="m_01_collie1_1p_1s", solver={
            "name": "createOdeApprox_IE",  # IE:Implicit Euler.
            "resnorm_tolerance": float(0.1),
            "resnorm_maxiter": float(6),
        }, store_ini=float(5) ):
    """Write model configuration file.

    Adds the model parameters to forcing file for the given period
    and catchment including the spinup year and writes this information
    to a model configuration file.
    """
    # get the forcing that was created with ESMValTool
    #forcing_file = f"marrmot-m01_{forcing}_{catchment}_{PERIOD['spinup'].year}_{PERIOD['end'].year}.mat"
    forcing_data = sio.loadmat(forcing_file_loc, mat_dtype=True)

    # select forcing data
    forcing_data["time_end"][0][0:3] = [
        period["end"].year,
        period["end"].month,
        period["end"].day,
    ]

    # combine forcing and model parameters
    forcing_data.update(
        model_name=model_name,
        parameters=parameters,
        solver=solver,
        store_ini=store_ini,
    )

    # save as new configuration file
    config_file = TEMP_DIR / (
        f"config_marrmot-m01_{forcing}_{catchment}_{PERIOD['spinup'].year}_{PERIOD['end'].year}.mat"
    )
    
    sio.savemat(config_file_loc, forcing_data)

## Create the model object

In [None]:
#create config file and write to containerIn directory
write_marrmot_config(parameters, catchment, period, FORCING_DIR / forcing_file, containerIn / config_file_name)

In [None]:
#start up the container and initialize using the created config
model = BmiClientDocker(
    image="ewatercycle/marrmot-grpc4bmi:latest", image_port=55555, input_dir=containerIn, output_dir=containerOut
)
model.initialize(config_file_name)

In [None]:
# Get time information
time_units = model.get_time_units()
spinup = num2date(model.get_start_time(), time_units)
end = num2date(model.get_end_time(), time_units)

# Get space information
grid = model.get_var_grid(variable)
lat = model.get_grid_x(grid)[0]
lon = model.get_grid_y(grid)[0]

time_range = pd.date_range(
    spinup.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")
)

# Run the model

In [None]:
desc = f"Running with parameters:{parameters}"
variable_overtime = []
variable_time = []
for current_time in tqdm(time_range, desc=desc):
    
    # Store model time and variable output after the spin up period
    if current_time >= period["start"]:
        
        #store current time of model into variable_time
        now = num2date(
            model.get_current_time(),
            time_units,
            only_use_cftime_datetimes=False
        )
        variable_time.append(now) #.append(now.strftime("%Y-%m-%d"))
        
        #store value (discharge) of model in variable_overtime
        print(model.get_value(variable)
        value = model.get_value(variable)[0]
        variable_overtime.append(value)
    
    #update the model a single timestep
    model.update()
    

In [None]:
# convert model output to xArray data array
da = xr.DataArray(
    np.array(variable_overtime),
    coords={
        "longitude": lon,
        "latitude": lat,
        "time": np.array(variable_time)
    },
    dims=["time"],
    name=variable,
    attrs={"units": model.get_var_units(variable)},
)


In [None]:
print(variable)

In [None]:
#clear up the model object, including shutting down the container
model.finalize()
del model

## Get the catchment area in m2 to be used 

In [None]:
# project directories for model input i.e. shape files and forcing
shapefile = DATA_HOME / "marrmot"/ "marrmotForcing" / f"Moselle.shp"
shape = shapereader.Reader(shapefile)
attr = next(shape.records())
area = attr.attributes["SUB_AREA"] * 1e6
print("The catchment area is:", area)

## Get GRDC observation data to be used in calibration and comparison

In [None]:
# add GRDC directory to environment variables



observations = get_grdc_data(station_id, start_date=period['start'].strftime('%Y-%m-%d'), end_date=period['end'].strftime('%Y-%m-%d'))
grdc_obs = observations.to_dataframe().rename(columns = {'streamflow': 'GRDC'})
grdc_lon = observations.attrs['grdc_longitude_in_arc_degree']
grdc_lat = observations.attrs['grdc_latitude_in_arc_degree']



## Compare with observation at grdc station

In [None]:
# convert simulated streamflow to same units as observation
simulations = da.values * area * conversion

# Convert model simulations to DataFrame
simulations = pd.DataFrame(simulations).rename(columns={0:"marrmot"})
simulations.index = pd.to_datetime(da.time.values)
simulations.index = simulations.index.date
simulations.index.name = "time"

# Convert grdc observations to DataFrame
#observations = observations_ds.to_dataframe().rename(columns={"streamflow": "GRDC"})
#observations.index = observations.index.date
#observations.index.name = "time"

discharge = pd.concat([simulations, grdc_obs], axis=1, sort=False)

In [None]:
hv.plot(
    discharge[["marrmot", "GRDC"]],
    title=f"Daily hydrograph of {catchment}",
    linestyles=["r-", "k-"],
    legend=("Simulated", "Observed"),
    labels=["Date", "Streamflow $\\left(m^{3} s^{-1}\\right)$"],
    metrics=["ME", "NSE", "SA"],
    grid=True,
)
plt.show()