In [1]:
import pandas as pd
import plotly.express as px

from src.utils import *
from src.features.base import *
from src.features.volume_obs import *
from src.features.swe import *
from src.data.base import *
from src.config import *
from src.models.postprocess import *
from src.models.lgb import *
from src.models.ensemble import *

import importlib

EXP_NAME = 'lgb_sweK9L2S1_diffp_S4_m3_ff'
cfg = importlib.import_module(f"configs.lgb.{EXP_NAME}").cfg

# Evaluation

In [2]:
exp_list = [
    "lgb_sweK9L2S1_diffp_S4_m3_ff",
]

groupby_cols=["site_id", "year", "month", "day", "md_id"]

df_pred_val_all = []
df_pred_test_all = []
for exp_name in exp_list:
    df_pred = pd.read_csv(f"runs/new/{exp_name}/pred.csv").query('(site_id=="detroit_lake_inflow" & md_id>=24)==False')
    df_pred['pred_volume_50'] = df_pred['pred_volume_reg']

    df_pred_val = df_pred.query('volume==volume')
    df_pred_test = df_pred.query('volume!=volume')
    if len(exp_list) == 1:
        df_pred_test = mean_ensemble(df_pred_test, groupby_cols=groupby_cols)


    df_pred_val_all.append(df_pred_val)
    df_pred_test_all.append(df_pred_test)

df_pred_val_all = pd.concat(df_pred_val_all)
df_pred_test_all = pd.concat(df_pred_test_all)
df_pred_val_all = clip_prediction(rearrange_prediction(df_pred_val_all))
df_pred_test_all = clip_prediction(rearrange_prediction(df_pred_test_all))

df_pred_test_all = clip_prediction(rearrange_prediction(df_pred_test_all))
df_pred_val_ens = custom_ensemble(df_pred_val_all, groupby_cols=groupby_cols)
df_pred_test_ens = custom_ensemble(df_pred_test_all, groupby_cols=groupby_cols)
df_pred_val_ens["cat"] = "val"

df_pred_val_ens_pp = use_previous_forecast_sites(df_pred_val_ens,
                                                 months=[5,6,7],
                                                 cols=["pred_volume_10", "pred_volume_50"])
df_pred_test_ens_pp = use_previous_forecast_sites(df_pred_test_ens,
                                                  months=[5,6,7],
                                                  cols=["pred_volume_10", "pred_volume_50"])

In [3]:
eval_all(df_pred_val_ens, [["cat"], ["year"], ["month"]])

['cat']


Unnamed: 0_level_0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
val,7240.0,85.276281,48.893875,134.320422,72.614547,0.723066,282.235382,0.951139,22.122455,-3.950486,885.275985,889.22647




['year']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
2004,724.0,94.825265,53.231869,160.667479,70.576447,0.611878,347.201301,0.846068,30.959518,-142.832142,639.101182,781.933325
2006,724.0,96.81323,48.920824,156.158999,85.359866,0.759669,284.085569,0.95858,22.704459,0.849045,1093.336039,1092.486994
2008,724.0,80.957513,48.269217,132.189472,62.41385,0.849448,299.13278,0.935492,14.73327,-59.274641,939.324271,998.598911
2010,724.0,80.909564,48.153076,135.195107,59.38051,0.770718,246.151648,0.931284,15.101639,75.551127,836.593094,761.041967
2012,724.0,130.30635,58.615497,185.402724,146.900828,0.578729,462.462636,0.926384,29.226131,107.65683,955.773917,848.117087
2014,724.0,77.30876,48.882202,111.163567,71.880511,0.729282,227.237561,0.973018,19.130651,33.681371,935.756956,902.075585
2016,724.0,69.91311,32.656094,117.288045,59.79519,0.808011,191.466034,0.965848,18.540281,-80.20814,794.249193,874.457334
2018,724.0,83.440139,53.142524,139.438123,57.739771,0.714088,270.993451,0.963969,17.925362,49.227589,998.41589,949.1883
2020,724.0,61.164419,44.874649,90.20244,48.416169,0.714088,154.969502,0.985125,20.630065,34.19034,851.73058,817.540241
2022,724.0,77.124465,52.192804,115.498264,63.682325,0.694751,213.595089,0.971706,32.273172,-58.346234,808.478724,866.824958




['month']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
1,1040.0,140.248998,78.130851,223.453787,119.162356,0.689423,405.925426,0.898424,40.30158,-14.121495,883.286327,897.407822
2,1040.0,127.938332,68.129041,202.244949,113.441004,0.686538,376.215812,0.912748,34.873206,-10.848257,883.286327,894.134584
3,1040.0,110.586618,61.683744,173.409391,96.666719,0.697115,321.903055,0.936122,29.675629,3.121817,883.286327,880.16451
4,1040.0,81.219418,47.427379,128.366111,67.864763,0.736538,251.284864,0.961075,21.336568,-4.562177,883.286327,887.848504
5,1040.0,65.513217,41.608917,102.144403,52.786332,0.779808,214.149614,0.971729,14.768945,12.224875,883.286327,871.061452
6,1040.0,46.582432,29.792183,72.578253,37.37686,0.730769,169.493878,0.98229,9.110399,-7.980863,883.286327,891.267189
7,1000.0,22.427702,14.148658,34.195086,18.939363,0.742,84.50398,0.995751,4.097593,-5.548772,897.691448,903.24022
0,1034.285714,84.93096,48.702968,133.770283,72.319628,0.72317,260.496661,0.951163,22.023417,-3.959267,885.344201,889.303469
0,15.118579,43.466507,22.418162,69.512895,38.621744,0.034055,115.324924,0.036427,13.533569,8.96159,5.444624,10.843724






In [4]:
eval_all(df_pred_val_ens_pp, [["cat"], ["year"], ["month"]])

['cat']


Unnamed: 0_level_0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
val,7240.0,85.470096,49.343833,134.451909,72.614547,0.718094,283.442046,0.950721,22.228853,-5.966258,885.275985,891.242243




['year']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
2004,724.0,94.353383,52.918704,159.564998,70.576447,0.611878,346.552922,0.846642,31.197308,-145.766299,639.101182,784.867482
2006,724.0,96.039181,47.956693,154.800982,85.359866,0.752762,283.6319,0.958712,22.862883,-4.716577,1093.336039,1098.052616
2008,724.0,82.897628,53.452617,132.826417,62.41385,0.839779,302.144821,0.934186,14.752746,-63.089603,939.324271,1002.413874
2010,724.0,80.184701,48.22269,132.950903,59.38051,0.774862,241.701445,0.933746,14.856473,79.110658,836.593094,757.482435
2012,724.0,130.844896,58.565178,187.068683,146.900828,0.581492,466.937824,0.924952,29.421998,109.269503,955.773917,846.504414
2014,724.0,76.533223,47.588761,110.130396,71.880511,0.718232,226.634293,0.973161,19.079516,30.364246,935.756956,905.39271
2016,724.0,71.110915,33.29795,120.239606,59.79519,0.808011,195.280937,0.964473,18.751495,-83.789192,794.249193,878.038386
2018,724.0,85.136005,55.350266,142.317979,57.739771,0.701657,276.498819,0.96249,18.18921,42.184398,998.41589,956.231492
2020,724.0,60.406998,44.270633,88.534191,48.416169,0.700276,153.680392,0.985372,20.814821,31.744679,851.73058,819.985901
2022,724.0,77.194035,51.81484,116.084938,63.682325,0.691989,213.398521,0.971758,32.36208,-54.974394,808.478724,863.453118




['month']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
1,1040.0,140.248998,78.130851,223.453787,119.162356,0.689423,405.925426,0.898424,40.30158,-14.121495,883.286327,897.407822
2,1040.0,127.938332,68.129041,202.244949,113.441004,0.686538,376.215812,0.912748,34.873206,-10.848257,883.286327,894.134584
3,1040.0,110.586618,61.683744,173.409391,96.666719,0.697115,321.903055,0.936122,29.675629,3.121817,883.286327,880.16451
4,1040.0,81.219418,47.427379,128.366111,67.864763,0.736538,251.284864,0.961075,21.336568,-4.562177,883.286327,887.848504
5,1040.0,64.768052,42.15239,99.365434,52.786332,0.760577,212.85203,0.972071,15.403525,5.541618,883.286327,877.744709
6,1040.0,48.59642,32.954286,75.458112,37.37686,0.721154,183.605971,0.979219,9.259937,-10.455941,883.286327,893.742268
7,1000.0,22.511348,13.552552,35.042127,18.939363,0.736,86.468233,0.995551,4.052433,-10.618297,897.691448,908.309745
0,1034.285714,85.124169,49.147178,133.905702,72.319628,0.718192,262.607913,0.950744,22.128983,-5.991819,885.344201,891.33602
0,15.118579,43.213416,22.130679,69.117651,38.621744,0.028086,113.161338,0.035998,13.465042,7.628146,5.444624,10.490355






In [5]:
eval_agg(df_pred_val_ens_pp, ["site_id"], is_include_mean_std=False).assign(
    nmpl = lambda x: x['mpl'] / x['actual_mean']
)[['mpl','mpl10','mpl50','mpl90','int_cvr','nmpl']].sort_values("nmpl")

Unnamed: 0_level_0,mpl,mpl10,mpl50,mpl90,int_cvr,nmpl
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
stehekin_r_at_stehekin,43.928149,27.272857,62.399451,42.112139,0.771429,0.058569
hungry_horse_reservoir_inflow,142.036349,79.411183,216.594891,130.102974,0.642857,0.06431
snake_r_nr_heise,209.576339,131.191851,342.568846,154.96832,0.707143,0.066171
boise_r_nr_boise,90.831721,56.358211,145.499511,70.637441,0.771429,0.075242
weber_r_nr_oakley,7.58949,4.450073,12.284327,6.034071,0.735714,0.081888
libby_reservoir_inflow,482.755408,269.761818,728.591169,449.913238,0.707143,0.084834
yampa_r_nr_maybell,75.144611,43.540061,128.14851,53.745262,0.85,0.090139
ruedi_reservoir_inflow,10.6658,6.623867,17.249044,8.124489,0.767857,0.090491
skagit_ross_reservoir,134.091435,72.884923,202.043995,127.345386,0.542857,0.094826
animas_r_at_durango,29.725159,18.466145,45.990438,24.718893,0.778571,0.096769


In [6]:
px.box(
    eval_agg(df_pred_val_ens, ["year","month"]).reset_index(),
    x='month',
    y=['mpl']
)

In [7]:
px.line(
    eval_agg(df_pred_val_ens, ["md_id"]).reset_index(),
    x='md_id',
    y=['mpl','mpl10','mpl50','mpl90']
)