In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

from lmfit import Model, Parameters
from scripts.lmfit_model import division_model, sum_model
from scripts.metrics import rmse


### Select file with enough data

In [4]:
processed_data_path = Path("data/HydroReady")
processed_data_path.mkdir(exist_ok=True, parents=True)
q_h_dict = {}
for file in Path("data/HydroFiles/").glob("*.csv"):
    hydro_file = pd.read_csv(file, parse_dates=True, index_col="date")
    q_condition = hydro_file["q_cms"].isna().sum() > 2000
    h_condition = hydro_file["lvl_sm"].isna().sum() > 2000
    if q_condition & h_condition:
        continue
    else:
        hydro_file = hydro_file[["q_cms", "lvl_sm"]].groupby(pd.Grouper(freq="10D")).max()
        hydro_file[hydro_file <= 0] = np.nan
        hydro_file = hydro_file.sort_values(by="q_cms", ascending=False).dropna()
        q_h_dict[file.stem] = hydro_file
        hydro_file.to_pickle(processed_data_path / f"{file.stem}.pkl")


### Plot full range Q(h)

In [12]:
image_storage = Path("data/initial_curves/")
image_storage.mkdir(exist_ok=True, parents=True)
# Disable interactive mode to prevent plots from showing up
for gauge_id, data_file in tqdm(q_h_dict.items()):
    qh_scatter = data_file.plot.scatter(x="q_cms", y="lvl_sm", c="Red")
    qh_scatter.get_figure().savefig(f"{image_storage}/{gauge_id}.png")
    plt.close()


  0%|          | 0/1884 [00:00<?, ?it/s]

### Open water period (april-october)

In [13]:
image_storage = Path("data/open_river_curves/")
image_storage.mkdir(exist_ok=True, parents=True)
# Disable interactive mode to prevent plots from showing up
for gauge_id, data_file in tqdm(q_h_dict.items()):
    qh_scatter = data_file.loc[data_file.index.month.isin(range(4, 11))].plot.scatter(
        x="q_cms", y="lvl_sm", c="Red"
    )
    qh_scatter.get_figure().savefig(f"{image_storage}/{gauge_id}.png")
    plt.close()


  0%|          | 0/1884 [00:00<?, ?it/s]

### Create Q(h) for each gauge

In [14]:
qh_storage_full = Path("data/params/qh_full")
qh_storage_full.mkdir(exist_ok=True, parents=True)

counter = 0
for gauge_id, data_file in tqdm(q_h_dict.items()):
    data_file = data_file.sort_values(by="q_cms", ascending=False)
    if len(data_file) < 4:
        counter += 1
        continue
    x = data_file["q_cms"].to_numpy()
    y = data_file["lvl_sm"].to_numpy()
    use_model = None
    try:
        use_model = division_model
        params = Parameters()
        params.add("power", value=1, min=0.01, vary=True)
        params.add("c", value=1, min=0.1, vary=True)
        params.add("slope", value=1, min=0.01, vary=True)
        params.add("b", value=1, min=0.1, vary=True)
        result = use_model.fit(y, params, x=x)
        error_res = rmse(y_true=y, y_pred=result.best_fit)
        if error_res < 60.0:
            with open(f"{qh_storage_full}/{gauge_id}_div.json", "w") as f:
                result.params.dump(f)
        else:
            counter += 1
            print(f"Bullshit ! for Gauge with ID {gauge_id} error is {error_res:.2f}")
    except ValueError:
        use_model = sum_model
        params = Parameters()
        params.add("power", value=1, min=0.01, max=3, brute_step=0.1, vary=True)
        params.add("c", value=1, min=0.1, vary=True)
        params.add("slope", value=1, min=0.001, max=3, brute_step=0.1, vary=True)
        params.add("b", value=1, min=0.1, vary=True)
        result = use_model.fit(y, params, x=x)

        error_res = rmse(y_true=y, y_pred=result.best_fit)
        if error_res < 60.0:
            with open(f"{qh_storage_full}/{gauge_id}_sum.json", "w") as f:
                result.params.dump(f)
        else:
            counter += 1
            # print(f"Bullshit ! for Gauge with ID {gauge_id} error is {error_res}")
print(f"\nShit data for {counter} gauges out from {len(q_h_dict.keys())}")


  0%|          | 0/1884 [00:00<?, ?it/s]

Bullshit ! for Gauge with ID 84173 error is 168.49
Bullshit ! for Gauge with ID 84399 error is 103.14
Bullshit ! for Gauge with ID 84039 error is 148.52
Bullshit ! for Gauge with ID 84213 error is 112.05
Bullshit ! for Gauge with ID 84401 error is 82.17
Bullshit ! for Gauge with ID 84398 error is 72.79
Bullshit ! for Gauge with ID 84165 error is 187.52
Bullshit ! for Gauge with ID 9302 error is 71.52
Bullshit ! for Gauge with ID 84824 error is 61.96
Bullshit ! for Gauge with ID 84029 error is 110.62
Bullshit ! for Gauge with ID 84200 error is 82.21
Bullshit ! for Gauge with ID 84215 error is 124.71
Bullshit ! for Gauge with ID 5217 error is 62.91
Bullshit ! for Gauge with ID 83441 error is 69.87
Bullshit ! for Gauge with ID 84448 error is 63.16
Bullshit ! for Gauge with ID 84064 error is 66.18
Bullshit ! for Gauge with ID 84449 error is 92.63
Bullshit ! for Gauge with ID 11487 error is 73.67
Bullshit ! for Gauge with ID 75511 error is 125.94
Bullshit ! for Gauge with ID 19235 error is 

### Q(h) for open river (march-october)

In [5]:
qh_storage_full = Path("data/params/qh_open")
qh_storage_full.mkdir(exist_ok=True, parents=True)

counter = 0
for gauge_id, data_file in tqdm(q_h_dict.items()):
    data_file = data_file.loc[data_file.index.month.isin(range(4, 11))].sort_values(
        by="q_cms", ascending=False
    )
    if len(data_file) < 4:
        counter += 1
        continue
    x = data_file["q_cms"].to_numpy()
    y = data_file["lvl_sm"].to_numpy()
    use_model = None
    try:
        use_model = division_model
        params = Parameters()
        params.add("power", value=1, min=0.01, vary=True)
        params.add("c", value=1, min=0.1, vary=True)
        params.add("slope", value=1, min=0.01, vary=True)
        params.add("b", value=1, min=0.1, vary=True)
        result = use_model.fit(y, params, x=x)
        error_res = rmse(y_true=y, y_pred=result.best_fit)
        if error_res < 60.0:
            with open(f"{qh_storage_full}/{gauge_id}_div.json", "w") as f:
                result.params.dump(f)
        else:
            counter += 1
            print(f"Bullshit ! for Gauge with ID {gauge_id} error is {error_res:.2f}")
    except ValueError:
        use_model = sum_model
        params = Parameters()
        params.add("power", value=1, min=0.01, max=3, brute_step=0.1, vary=True)
        params.add("c", value=1, min=0.1, vary=True)
        params.add("slope", value=1, min=0.001, max=3, brute_step=0.1, vary=True)
        params.add("b", value=1, min=0.1, vary=True)
        result = use_model.fit(y, params, x=x)

        error_res = rmse(y_true=y, y_pred=result.best_fit)
        if error_res < 60.0:
            with open(f"{qh_storage_full}/{gauge_id}_sum.json", "w") as f:
                result.params.dump(f)
        else:
            counter += 1
            # print(f"Bullshit ! for Gauge with ID {gauge_id} error is {error_res}")
print(f"\nShit data for {counter} gauges out from {len(q_h_dict.keys())}")


  0%|          | 0/1884 [00:00<?, ?it/s]

Bullshit ! for Gauge with ID 9499 error is 62.68
Bullshit ! for Gauge with ID 84173 error is 174.23
Bullshit ! for Gauge with ID 77164 error is 64.09
Bullshit ! for Gauge with ID 84399 error is 101.96
Bullshit ! for Gauge with ID 84039 error is 160.21
Bullshit ! for Gauge with ID 84213 error is 113.36
Bullshit ! for Gauge with ID 84401 error is 78.90
Bullshit ! for Gauge with ID 84398 error is 77.76
Bullshit ! for Gauge with ID 84165 error is 193.82
Bullshit ! for Gauge with ID 9302 error is 67.34
Bullshit ! for Gauge with ID 84824 error is 64.52
Bullshit ! for Gauge with ID 84029 error is 111.42
Bullshit ! for Gauge with ID 84215 error is 122.49
Bullshit ! for Gauge with ID 5217 error is 64.57
Bullshit ! for Gauge with ID 83441 error is 65.04
Bullshit ! for Gauge with ID 84448 error is 67.77
Bullshit ! for Gauge with ID 84064 error is 63.92
Bullshit ! for Gauge with ID 84449 error is 96.71
Bullshit ! for Gauge with ID 11487 error is 93.49
Bullshit ! for Gauge with ID 75511 error is 13