# Figures for the paper ' '


In [None]:
# Preparation of the notebook environment.
# (Import libraries, upload the data, define the auxiliary functions, and set the variables.)
import sys
import os
sys.path.append('..')
import pandas as pd
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from preprocessing.advanced_transforms import load_characteristics # For the DMA characteristics
from wflib.evaluator import WaterFuturesEvaluator
dmas_characteristics = load_characteristics()
wfe = WaterFuturesEvaluator(
    data_dir= os.path.join('data')
) # This loads all results in the results folder

H_PER_DAY__k = 24
H_PER_WEEK__k = 7 * H_PER_DAY__k

In [None]:
# Overwrite these variables for the figure appearance 
img__height = 1600
img__width = 1200

img__rows = 5
img__cols = 2

subplot_settings= dict(rows=5, cols=2, 
    shared_xaxes=True, shared_yaxes=True,
    horizontal_spacing=0.025, vertical_spacing=0.025,
    subplot_titles=dmas_characteristics.index,
    x_title=None, y_title=None
)

img__font_family = "Lato"
img__font_color = "black"
img__font_size = 14

img__title = "Title"
img__title_font_size = 22

img__line_color = "grey"
img__zline_color = "black"
img__glines_color = "lightgrey"
img__axis_width = 1

def get_axis_custom_ticks(min_val, max_val, steps=5):
    return ([min_val, max_val],
            list(np.linspace(min_val, max_val, steps)),
            list(np.linspace(min_val, max_val, 2*steps -1)))

img__xaxis_title = "X-Axis"
img__xaxis_ticktext = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
img__xaxis_range, img__xaxis_tickvals, img__xaxis_subticks = get_axis_custom_ticks(0, H_PER_WEEK__k, 8)

img__yaxis_title = "Y-Axis"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 1, 6)
img__yaxis_ticktext = [f"{i:.2f}" for i in img__yaxis_tickvals]

img__legend_font_size = 16

img__line_width = 5

img__color_blue = "#22409C" # Marian Blue
img__color_green = "#009344" # Shamrock Green
img__color_orange = "#C03221" # Engineering Orange
img__color_brown = "#393424" # Drab Dark Brown
img__color_pink = "#F283B6" # Persian Pink
img__color_orange2 = "#FFA500" # Orange (Web Color)
colors = [img__color_blue, img__color_green, img__color_orange, img__color_orange2]

# Helper function to add an alpha and convert the color to a string for plotly
def with_alpha(color: str, alpha: float) -> str:
    assert 0 <= alpha <= 1
    return f"rgba{tuple(int(color[i:i+2], 16) for i in (1, 3, 5)) + (alpha,)}"

def fix_layout(a_fig: go.Figure) -> None:
    a_fig.update_layout(
        title=dict(
            text=img__title,
            xanchor='center',
            x=0.5,
            yanchor='top',
            y=0.98,
            font = dict(
                family=img__font_family,
                size=img__title_font_size,
                color=img__font_color
            )
        ),
        plot_bgcolor='white',
        paper_bgcolor='white',
        width=img__width,
        height=img__height,
        font=dict(
            family=img__font_family,
            color=img__font_color,
            size=img__font_size
        ),
        margin=dict(
            l=10,
            r=10,
            b=10,
            t=100,
            pad=0
        ),
        showlegend=True,
        legend=dict(
            orientation="v",
            xanchor="left",
            x=0.03,  
            yanchor="top",
            y=0.9,  
            itemsizing='trace',  # To ensure items in legend keep the same size
            traceorder="normal",
            bgcolor="White",  # Background color
            bordercolor="Black",  # Border color
            borderwidth=1,  # Border width
            groupclick="toggleitem",
            itemclick="toggleothers",
            itemdoubleclick="toggle",
            tracegroupgap=100,
            font_size=img__legend_font_size
        )
    )

def fix_xaxis(a_fig: go.Figure) -> None:  
    # Find all x-axes and y-axes in the figure layout
    # Only the last row and the first column should have x-axis and y-axis titles
    for key in a_fig.layout:
        if key.startswith('xaxis'):
            xaxis_properties = dict(
                title=None,
                range=img__xaxis_range,
                automargin=True,
                showline=True,
                showgrid=True,
                linewidth=img__axis_width,
                linecolor=img__line_color,
                zerolinecolor=img__zline_color,
                gridcolor=img__glines_color,
                tickvals=img__xaxis_ticktext,
                ticktext=img__xaxis_ticktext,
                tickmode='array',
                ticks='outside',
                minor=dict(
                    gridcolor=img__glines_color,
                    tickvals=img__xaxis_subticks
                )
            )

            if key in ['xaxis9', 'xaxis10']:
                xaxis_properties['title']= img__xaxis_title

            a_fig.update_layout({key: xaxis_properties})

def fix_yaxis(a_fig: go.Figure) -> None:
    for key in a_fig.layout:
        if key.startswith('yaxis'):
            yaxis_properties = dict(
                title=None,
                range=img__yaxis_range,
                automargin=True,
                showline=True,
                showgrid=True,
                linewidth=img__axis_width,
                linecolor=img__line_color,
                zerolinecolor=img__zline_color,
                gridcolor=img__glines_color,
                tickvals=img__yaxis_ticktext,
                ticktext=img__yaxis_ticktext,
                tickmode='array',
                ticks='outside',
                minor=dict(
                    gridcolor=img__glines_color,
                    tickvals=img__yaxis_subticks
                )
            )

            if key in ['yaxis', 'yaxis3', 'yaxis5', 'yaxis7', 'yaxis9']:
                yaxis_properties['title']= img__yaxis_title

            a_fig.update_layout({key: yaxis_properties})


**Figure 1** Flowchart of the Water-Futures ensemble forecasting framework.

**Figure 2** Characteristic of the weeekly demand pattern of the DMAs (from panel (a) DMA A, to panel (j) DMA j)

*We plot a subfigure for each DMA. In the x-axis there is the hour of the week (from 0 to 168). On the y-axis there is the signal scaled by the mean. The signal is the median and the xx percentile with a fill in between.

In [None]:
img__title = "Weekly Demand Patterns of the DMAs"

img__xaxis_title = "Hour of the week [h]"
img__yaxis_title = "Mean-scaled flowrate [-]"

img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 2, 5)
img__yaxis_ticktext = [f"{i:.1f}" for i in img__yaxis_tickvals]

fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    xvalues = np.arange(H_PER_WEEK__k)

    yvalues = wfe.demand[dma].groupby([wfe.demand.index.dayofweek, wfe.demand.index.hour]).quantile(0.75).values
    yscale = yvalues.mean()
    yvalues = yvalues / yscale
    fig.add_trace(go.Scatter(
            x= xvalues,
            y= yvalues,
            mode='lines',
            line=dict(color=img__color_blue, width=1),
            name="75th percentile",
            showlegend=i == 0,
            legendgroup="upper"
        ),
        row=row, col=col
    )

    yvalues = wfe.demand[dma].groupby([wfe.demand.index.dayofweek, wfe.demand.index.hour]).median().values / yscale
    fig.add_trace(go.Scatter(
            x= xvalues,
            y= yvalues,
            mode='lines',
            line=dict(color=img__color_blue, width=img__line_width),
            fill='tonexty',
            fillcolor=with_alpha(img__color_blue, 0.5),
            name="Median",
            showlegend=i == 0,
            legendgroup="median"
        ),
        row=row, col=col
    )

    yvalues = wfe.demand[dma].groupby([wfe.demand.index.dayofweek, wfe.demand.index.hour]).quantile(0.25).values / yscale
    fig.add_trace(go.Scatter(
            x= xvalues,
            y= yvalues,
            mode='lines',
            line=dict(color=img__color_blue, width=1),
            fill='tonexty',
            fillcolor=with_alpha(img__color_blue, 0.5),
            name="25th percentile",
            showlegend=i == 0,
            legendgroup="lower"
        ),
        row=row, col=col
    )

fix_layout(fig)
fix_xaxis(fig)
fix_yaxis(fig)

fig.show()

In [None]:
# Prepare the results for the selected models
res = {
    'demand': wfe.demand
}

selected_models = ['PrevWeek', 'AutoRollingAverage'] #, 'LGBMrobust', 'WaveNet', 'XGBMsimple']

from scores.probability import crps_for_ensemble
from scores.continuous.correlation import pearsonr

for i, model in enumerate (selected_models):
    res[model] = {
        'forecast': None,
        'error': None,
        'abs_error': None,
        'abs_pct_error': None,
        'crps': None,
        'r2': None
    }

    res[model]['forecast'] = wfe.models[model]['forecasts']

    res[model]['error'] = res[model]['forecast'] - res['demand']
    res[model]['abs_error'] = (res[model]['forecast'] - res['demand']).abs()
    res[model]['abs_pct_error'] = (res[model]['forecast'] - res['demand']).abs() / res['demand']
    res[model]['crps'] = crps_for_ensemble(res[model]['forecast'].to_xarray(), res['demand'].to_xarray(), ensemble_member_dim='seed', preserve_dims='date').to_dataframe()
    res[model]['r2'] = pd.DataFrame()
    for dma in res[model]['forecast'].columns:
        a = (pearsonr(res[model]['forecast'][dma].to_xarray(), res['demand'][dma].to_xarray(), preserve_dims=['seed'])**2).to_dataframe()
        res[model]['r2'] = pd.concat([res[model]['r2'], a], axis=1)

    print(res[model]['r2'].mean(axis=0))

In [None]:
fig = go.Figure()
for d, dma in enumerate(res[model]['r2'].columns):
    for i, model in enumerate (selected_models):
        fig.add_trace(go.Box(
            y=res[model]['r2'][dma],
            name=model+dma,
            boxmean='sd',
            boxpoints='all',
            pointpos=0,
            marker=dict(
                color=colors[i % len(colors)],
                size=4,
                line=dict(width=1)
            ),
            line_width=img__line_width,
            showlegend=False
        ))

fig.update_layout(
    xaxis_title="DMA",
    yaxis_title="R2",
    title=dict(
        text="R2 of the models",
        xanchor='center',
        x=0.5,
        yanchor='top',
        y=0.98,
        font = dict(
            family=img__font_family,
            size=img__title_font_size,
            color=img__font_color
        )
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(
        family=img__font_family,
        color=img__font_color,
        size=img__font_size
    ),

    margin=dict(
        l=10,
        r=10,
        b=10,
        t=100,
        pad=0
    ),

    showlegend=False,
    yaxis=dict(
        range=[0, 1],
        automargin=True,
        showline=True,
        showgrid=True,
        linewidth=img__axis_width,
        linecolor=img__line_color,
        zerolinecolor=img__zline_color,
        gridcolor=img__glines_color
    ))
fig.show()

In [None]:
#plot-1
#Error plot
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):
        
        error_df = res[model]["error"][dma].groupby("date").mean() / dmas_characteristics.loc[dma, "h_mean"]

        fig.add_trace(go.Scatter(x=error_df.index, y=error_df,
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Scaled model's error in testing by DMA"
fix_layout(fig)

img__yaxis_title = "Scaled error magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(-1, 1, 5)
img__yaxis_ticktext = [f"{i:.1f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Time [h]"
fig.update_layout(xaxis9_title=img__xaxis_title, xaxis10_title=img__xaxis_title)

fig.show()

In [None]:
#plot-2
#absolute error 
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):
        
        abs_error_df = res[model]["abs_error"][dma].groupby("date").mean() / dmas_characteristics.loc[dma, "h_mean"]

        fig.add_trace(go.Scatter(x=abs_error_df.index, y=abs_error_df,
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Scaled model's absolute error in testing by DMA"
fix_layout(fig)

img__yaxis_title = "Scaled AE magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 1, 5)
img__yaxis_ticktext = [f"{i:.2f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Time [h]"
fig.update_layout(xaxis9_title=img__xaxis_title, xaxis10_title=img__xaxis_title)

fig.show()

In [None]:
#plot-3
#mean absolute error 
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):
        
        abs_pct_error_df = res[model]["abs_pct_error"][dma].groupby("date").mean()

        fig.add_trace(go.Scatter(x=abs_pct_error_df.index, y=abs_pct_error_df,
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Model's absolute percentage error in testing by DMA"
fix_layout(fig)

img__yaxis_title = "APE magnitude [-]"
img__yaxis_range = [0, 1]
img__yaxis_ticktext = list(np.linspace(0, 1, 5))
img__yaxis_tickvals = list(np.linspace(0, 1, 5))
img__yaxis_subticks = list(np.linspace(0, 1, 9))
fix_yaxis(fig)

img__xaxis_title = "Time [h]"
fig.update_layout(xaxis9_title=img__xaxis_title, xaxis10_title=img__xaxis_title)

fig.show()

In [None]:
#plot-4
#Mean absolute error by day
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):

        abs_error_df = res[model]["abs_error"].groupby("date").mean() / dmas_characteristics.loc[dma, "h_mean"]
        abs_error_df = abs_error_df.groupby(abs_error_df.index.weekday).mean() 

        fig.add_trace(go.Scatter(x=abs_error_df.index, y=abs_error_df[dma],
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Scaled model's Mean Absolute Error in testing per weekday by DMA"
fix_layout(fig)

img__yaxis_title = "Scaled MAE magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 0.2, 6)
img__yaxis_ticktext = [f"{i:.2f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Day of the week"
img__xaxis_range, img__xaxis_tickvals, img__xaxis_subticks = get_axis_custom_ticks(0, 6, 7)
img__xaxis_ticktext = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
img__xaxis_subticks = None
fix_xaxis(fig)

fig.show()

In [None]:
#plot-5
#plot MEAN ABSOLUTE ERROR PER HOUR
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):

        MAPE_df = res[model]["abs_pct_error"].groupby("date").mean()

        MAPE_df_hourly = MAPE_df.groupby((MAPE_df.index.weekday )*24 +  (MAPE_df.index.hour)).mean().rename_axis('HourOfWeek').reset_index()

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=MAPE_df_hourly['HourOfWeek'], y=MAPE_df_hourly[dma],
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Model's Mean Absolute Percentage Error in testing per weekhour by DMA"
fix_layout(fig)

img__yaxis_title = "MAPE magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 0.4, 6)
img__yaxis_ticktext = [f"{i:.1f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Hour of the week"
img__xaxis_range, img__xaxis_tickvals, img__xaxis_subticks = get_axis_custom_ticks(0, H_PER_WEEK__k, 7)
img__xaxis_ticktext = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
fix_xaxis(fig)

fig.show()

In [None]:
#plot-6
#plot CRPS 
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):
        
        crps_df = res[model]["crps"][dma]/dmas_characteristics.loc[dma, "h_mean"]

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=crps_df.index, y=crps_df,
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Scaled model's CRPS in testing by DMA"
fix_layout(fig)

img__yaxis_title = "CRPS magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 2, 6)
img__yaxis_ticktext = [f"{i:.1f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Time [h]"
fig.update_layout(xaxis9_title=img__xaxis_title, xaxis10_title=img__xaxis_title)

fig.show()

In [None]:
#plot-7
#plot MEAN CRPS per week day
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):
        
        crps_df = res[model]["crps"][dma] / dmas_characteristics.loc[dma, "h_mean"]
        crps_df_day = crps_df.groupby(crps_df.index.weekday).mean()

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=crps_df_day.index, y=crps_df_day,
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Scaled model's CRPS in testing per weekday by DMA"
fix_layout(fig)

img__yaxis_title = "CRPS magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 0.2, 6)
img__yaxis_ticktext = [f"{i:.2f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Day of the week"
img__xaxis_range, img__xaxis_tickvals, img__xaxis_subticks = get_axis_custom_ticks(0, 6, 7)
img__xaxis_ticktext = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
fix_xaxis(fig)

fig.show()

In [None]:
#plot-8
#plot mean CRPS per hour
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):

        crps_df = res[model]["crps"].groupby("date").mean() / dmas_characteristics.loc[dma, "h_mean"]                             

        crps_df_hour = crps_df.groupby((crps_df.index.weekday ) *24 + (crps_df.index.hour )).mean().rename_axis('HourOfWeek').reset_index()

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=crps_df_hour['HourOfWeek'], y=crps_df_hour[dma],
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=i == 0),
                                row=row, col=col)

img__title = "Model's CRPS in testing per weekhour by DMA"
fix_layout(fig)

img__yaxis_title = "CRPS magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 0.6, 6)
img__yaxis_ticktext = [f"{i:.1f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Hour of the week"
img__xaxis_range, img__xaxis_tickvals, img__xaxis_subticks = get_axis_custom_ticks(0, H_PER_WEEK__k, 7)
img__xaxis_ticktext = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
fix_xaxis(fig)

fig.show()

In [None]:
#plot-8
#plot Violin MAPE plots average day
fig = make_subplots(**subplot_settings)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    for j ,model in enumerate(selected_models):
        
        MAPE_df = res[model]["abs_pct_error"].groupby("date").mean()
         
        MAPE_df_day = MAPE_df[dma].groupby(MAPE_df.index.weekday).apply(list)

        MAPE_df_day_fixed = pd.DataFrame(MAPE_df_day.tolist()).T

        df_melted = MAPE_df_day_fixed.melt(var_name='weekday', value_name='MAPE')
        #print(MAPE_df_day_fixed)


        #Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Violin(x= df_melted["weekday"] , y=df_melted["MAPE"],
                                    name=f'{model}', line=dict(color=colors[j]),
                                    legendgroup='{model}', showlegend=i == 0,width=0.7),
                                    row=row, col=col)

img__title = "Violin plots of MAPE in testing per weekday by DMA"
fix_layout(fig)

img__yaxis_title = "MAPE magnitude [-]"
img__yaxis_range, img__yaxis_tickvals, img__yaxis_subticks = get_axis_custom_ticks(0, 1, 6)
img__yaxis_ticktext = [f"{i:.2f}" for i in img__yaxis_tickvals]
fix_yaxis(fig)

img__xaxis_title = "Day of the week"
img__xaxis_range, img__xaxis_tickvals, img__xaxis_subticks = get_axis_custom_ticks(0, 6, 7)
img__xaxis_ticktext = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
fix_xaxis(fig)
fig.show()