In [10]:
import pandas as pd
import plotly.express as px

from src.utils import *
from src.features.base import *
from src.features.volume_obs import *
from src.features.swe import *
from src.data.base import *
from src.config import *
from src.models.postprocess import *
from src.models.lgb import *
from src.models.ensemble import *

import importlib

EXP_NAME = 'lgb_sweK9L2S1_diffp_S4_m3_ff'
cfg = importlib.import_module(f"configs.lgb.{EXP_NAME}").cfg

# Evaluation

In [11]:
exp_list = [
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s1024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s3024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s4024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s5024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s6024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s7024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s8024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff_s9024",
    "lgb_sweK9L2S1_diffp_S4_m3_ff",
]

groupby_cols=["site_id", "year", "month", "day", "md_id"]

df_pred_val_all = []
df_pred_test_all = []
for exp_name in exp_list:
    df_pred = pd.read_csv(f"runs/new/{exp_name}/pred.csv").query('(site_id=="detroit_lake_inflow" & md_id>=24)==False')
    df_pred['pred_volume_50'] = df_pred['pred_volume_reg']

    df_pred_val = df_pred.query('volume==volume')
    df_pred_test = df_pred.query('volume!=volume')
    if len(exp_list) == 1:
        df_pred_test = mean_ensemble(df_pred_test, groupby_cols=groupby_cols)


    df_pred_val_all.append(df_pred_val)
    df_pred_test_all.append(df_pred_test)

df_pred_val_all = pd.concat(df_pred_val_all)
df_pred_test_all = pd.concat(df_pred_test_all)
df_pred_val_all = clip_prediction(rearrange_prediction(df_pred_val_all))
df_pred_test_all = clip_prediction(rearrange_prediction(df_pred_test_all))

df_pred_test_all = clip_prediction(rearrange_prediction(df_pred_test_all))
df_pred_val_ens = custom_ensemble(df_pred_val_all, groupby_cols=groupby_cols)
df_pred_test_ens = custom_ensemble(df_pred_test_all, groupby_cols=groupby_cols)
df_pred_val_ens["cat"] = "val"

df_pred_val_ens_pp = use_previous_forecast_sites(df_pred_val_ens,
                                                 months=[5,6,7],
                                                 cols=["pred_volume_10", "pred_volume_50"])
df_pred_test_ens_pp = use_previous_forecast_sites(df_pred_test_ens,
                                                  months=[5,6,7],
                                                  cols=["pred_volume_10", "pred_volume_50"])

In [12]:
eval_all(df_pred_val_ens, [["cat"], ["year"], ["month"]])

['cat']


Unnamed: 0_level_0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
val,7240.0,83.637944,47.621998,133.039327,70.252506,0.813536,278.146181,0.952545,22.022445,-5.447633,885.275985,890.723617




['year']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
2004,724.0,93.78152,41.839902,162.597148,76.90751,0.707182,347.9356,0.845416,30.984727,-145.539152,639.101182,784.640334
2006,724.0,93.903268,51.195349,153.286,77.228455,0.875691,278.159099,0.96029,22.834651,-3.684945,1093.336039,1097.020983
2008,724.0,78.927531,41.818999,126.214978,68.748615,0.899171,279.845529,0.943542,14.553012,-58.223248,939.324271,997.547519
2010,724.0,79.255409,50.922469,134.446672,52.397087,0.85221,243.270992,0.932883,15.029557,75.65262,836.593094,760.940474
2012,724.0,122.779,59.490074,184.449257,124.397669,0.69337,460.61242,0.926972,28.628723,106.036524,955.773917,849.737394
2014,724.0,75.426052,49.864558,108.628092,67.785505,0.83011,219.760334,0.974765,19.059203,31.633709,935.756956,904.123247
2016,724.0,72.284886,32.786864,118.299653,65.768141,0.871547,190.370367,0.966238,18.78011,-81.56189,794.249193,875.811083
2018,724.0,80.554652,53.011154,136.816667,51.836136,0.838398,263.806986,0.965854,17.732259,47.943698,998.41589,950.472192
2020,724.0,61.369789,47.149556,89.124357,47.835455,0.805249,153.857099,0.985338,20.357651,33.639777,851.73058,818.090803
2022,724.0,78.097329,48.141054,116.530447,69.620485,0.762431,218.544278,0.970379,32.264557,-60.373421,808.478724,868.852144




['month']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
1,1040.0,139.230448,78.207786,222.078586,117.404971,0.7625,404.184615,0.899293,39.818489,-15.760463,883.286327,899.04679
2,1040.0,125.355153,65.937411,202.472825,107.655221,0.754808,372.585196,0.914424,35.194978,-16.00945,883.286327,899.295776
3,1040.0,107.261597,60.177649,171.974314,89.632829,0.770192,316.102269,0.938404,29.739306,0.45938,883.286327,882.826947
4,1040.0,80.621209,47.136214,125.983393,68.74402,0.859615,242.477728,0.963755,21.163628,-4.56934,883.286327,887.855667
5,1040.0,64.740472,40.879151,100.264364,53.077901,0.853846,210.75727,0.972618,14.547004,10.837113,883.286327,872.449213
6,1040.0,44.641816,27.119528,70.945075,35.860845,0.835577,163.541658,0.983512,8.932513,-7.257769,883.286327,890.544096
7,1000.0,21.21399,12.547217,33.73743,17.357322,0.86,82.320167,0.995968,4.070747,-5.848311,897.691448,903.539759
0,1034.285714,83.294955,47.429279,132.493712,69.961873,0.813791,255.995558,0.952568,21.923809,-5.449834,885.344201,890.794036
0,15.118579,43.150451,22.820395,69.646006,37.137701,0.048862,115.546568,0.036199,13.53961,9.329172,5.444624,10.889938






In [13]:
eval_all(df_pred_val_ens_pp, [["cat"], ["year"], ["month"]])

['cat']


Unnamed: 0_level_0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
val,7240.0,83.667988,47.521319,133.23014,70.252506,0.812017,279.069543,0.952229,22.134178,-7.601584,885.275985,892.877569




['year']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
2004,724.0,93.686901,41.671869,162.481325,76.90751,0.698895,347.686116,0.845637,31.281404,-147.986161,639.101182,787.087344
2006,724.0,93.339305,50.290173,152.499286,77.228455,0.879834,278.168773,0.960288,23.005561,-9.761977,1093.336039,1103.098016
2008,724.0,79.430366,43.982589,125.559894,68.748615,0.885359,278.219549,0.944197,14.496934,-60.922389,939.324271,1000.24666
2010,724.0,78.836946,51.04996,133.063791,52.397087,0.860497,239.513541,0.93494,14.822959,78.344922,836.593094,758.248172
2012,724.0,123.235948,59.295588,186.014588,124.397669,0.703039,466.710734,0.925025,28.784521,107.580614,955.773917,848.193303
2014,724.0,74.440486,48.195462,107.340491,67.785505,0.827348,218.967234,0.974947,19.011811,28.036324,935.756956,907.720632
2016,724.0,73.123488,33.095201,120.507122,65.768141,0.874309,192.515374,0.965472,18.891726,-84.42952,794.249193,878.678713
2018,724.0,81.322076,52.873521,139.256571,51.836136,0.835635,267.200526,0.96497,17.968111,40.886234,998.41589,957.529655
2020,724.0,60.40915,46.485448,86.906547,47.835455,0.794199,151.761222,0.985735,20.568494,30.177246,851.73058,821.553334
2022,724.0,78.855215,48.273377,118.671784,69.620485,0.76105,220.241746,0.969917,32.51026,-57.941136,808.478724,866.41986




['month']


Unnamed: 0,n,mpl,mpl10,mpl50,mpl90,int_cvr,rmse,r2,mape,bias,actual_mean,pred_mean
1,1040.0,139.230448,78.207786,222.078586,117.404971,0.7625,404.184615,0.899293,39.818489,-15.760463,883.286327,899.04679
2,1040.0,125.355153,65.937411,202.472825,107.655221,0.754808,372.585196,0.914424,35.194978,-16.00945,883.286327,899.295776
3,1040.0,107.261597,60.177649,171.974314,89.632829,0.770192,316.102269,0.938404,29.739306,0.45938,883.286327,882.826947
4,1040.0,80.621209,47.136214,125.983393,68.74402,0.859615,242.477728,0.963755,21.163628,-4.56934,883.286327,887.855667
5,1040.0,64.22393,39.807654,99.786234,53.077901,0.845192,212.165823,0.972251,15.248622,3.574213,883.286327,879.712114
6,1040.0,45.52129,28.608242,72.094783,35.860845,0.826923,171.31546,0.981908,9.073472,-9.861943,883.286327,893.14827
7,1000.0,21.054063,11.384396,34.420471,17.357322,0.867,84.704794,0.995731,4.003415,-11.181164,897.691448,908.872612
0,1034.285714,83.323956,47.322764,132.68723,69.961873,0.812319,257.647984,0.952252,22.034559,-7.621252,885.344201,892.965454
0,15.118579,43.096529,22.963555,69.353606,37.137701,0.048451,113.843362,0.035892,13.470631,7.683874,5.444624,10.284916






In [14]:
eval_agg(df_pred_val_ens_pp, ["site_id"], is_include_mean_std=False).assign(
    nmpl = lambda x: x['mpl'] / x['actual_mean']
)[['mpl','mpl10','mpl50','mpl90','int_cvr','nmpl']].sort_values("nmpl")

Unnamed: 0_level_0,mpl,mpl10,mpl50,mpl90,int_cvr,nmpl
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
stehekin_r_at_stehekin,42.141795,29.696864,59.998908,36.729612,0.889286,0.056187
hungry_horse_reservoir_inflow,137.025956,81.777182,215.326614,113.974074,0.792857,0.062042
snake_r_nr_heise,204.908219,132.718256,336.910906,145.095496,0.771429,0.064697
boise_r_nr_boise,91.969968,56.925284,144.745782,74.238839,0.853571,0.076185
libby_reservoir_inflow,464.215596,241.44571,706.261819,444.939259,0.775,0.081576
weber_r_nr_oakley,7.935088,4.196735,12.660409,6.94812,0.839286,0.085616
yampa_r_nr_maybell,76.344874,44.397182,127.644369,56.99307,0.935714,0.091579
skagit_ross_reservoir,131.351764,68.654821,205.002623,120.397847,0.664286,0.092889
ruedi_reservoir_inflow,11.358137,6.103665,17.128755,10.841991,0.867857,0.096365
green_r_bl_howard_a_hanson_dam,25.387121,15.22995,41.582463,19.348951,0.75,0.096887


In [15]:
px.box(
    eval_agg(df_pred_val_ens, ["year","month"]).reset_index(),
    x='month',
    y=['mpl']
)

In [16]:
px.line(
    eval_agg(df_pred_val_ens, ["md_id"]).reset_index(),
    x='md_id',
    y=['mpl','mpl10','mpl50','mpl90']
)

In [17]:
generate_hindcast_submission(
    df_pred_test_ens_pp, dirname="data/sub", fname="lgb_sweK9L2S1_diffp_S4_m3_ff_ens9_pfs"
)

Unnamed: 0,site_id,issue_date,volume_10,volume_50,volume_90
0,hungry_horse_reservoir_inflow,2005-01-01,1260.112135,1793.995062,2318.676400
1,hungry_horse_reservoir_inflow,2005-01-08,1215.480662,1683.195117,2159.865551
2,hungry_horse_reservoir_inflow,2005-01-15,1292.345248,1679.669768,2032.373531
3,hungry_horse_reservoir_inflow,2005-01-22,1344.939706,1666.955377,1972.403307
4,hungry_horse_reservoir_inflow,2005-02-01,1143.127932,1487.324787,1864.247985
...,...,...,...,...,...
7235,owyhee_r_bl_owyhee_dam,2023-06-22,479.469397,522.073457,504.796418
7236,owyhee_r_bl_owyhee_dam,2023-07-01,515.715487,523.833327,534.240828
7237,owyhee_r_bl_owyhee_dam,2023-07-08,515.715487,523.833327,533.949848
7238,owyhee_r_bl_owyhee_dam,2023-07-15,515.715487,523.833327,534.816393
