# Figures for the paper ' '


In [None]:
# Preparation of the notebook environment.
# (Import libraries, upload the data, define the auxiliary functions, and set the variables.)
import sys
import os
os.chdir('..') # To work in the root directory
import pandas as pd
import numpy as np

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from scores.probability import crps_for_ensemble

from preprocessing.advanced_transforms import load_characteristics # For the DMA characteristics
from eval.evaluator import WaterFuturesEvaluator # For the results and forecasts
dmas_characteristics = load_characteristics()
wfe = WaterFuturesEvaluator() # This loads all results in the results folder
wfe.next_iter() # This loads the input data in the first iteration
DAY_LEN = 24
WEEK_LEN = 7 * DAY_LEN

# Define the figure layout and the color palette

# Size of the image should be (height) 6,5 cm x (width) 12 cm (2,56 x 4,72 in)
fig_size_in = (4.72, 2.56)
dpi_mac = 227
dpi = dpi_mac*0.75

# Overwrite these variables for the igure appearance 
img__font_family = "Lato"
img__font_color = "black"
img__font_size = 14
img__title = "Title"
img__title_font_size = 22
img__xaxis_title = "X-Axis"
img__xaxis_range = [0, 1]
img__line_color = "grey"
img__zline_color = "black"
img__glines_color = "lightgrey"
img__axis_width = 1
img__yaxis_title = "Y-Axis"
img__yaxis_range = [0, 1]
img__legend_font_size = 16

img__color_blue = "#22409C" # Marian Blue
img__color_green = "#009344" # Shamrock Green
img__color_orange = "#C03221" # Engineering Orange
img__color_brown = "#393424" # Drab Dark Brown
img__color_pink = "#F283B6" # Persian Pink
img__line_width = 5
colors = [img__color_blue, img__color_green, "#FFA500",img__color_pink]

# Helper function to add an alpha and convert the color to a string for plotly
def with_alpha(color: str, alpha: float) -> str:
    assert 0 <= alpha <= 1
    return f"rgba{tuple(int(color[i:i+2], 16) for i in (1, 3, 5)) + (alpha,)}"

print(with_alpha(img__color_blue, 0.5))

def fix_layout(a_fig: go.Figure) -> None:
    a_fig.update_layout(
        title=dict(
            
                            text=img__title,
                            xanchor='center',
                            x=0.5,
                            yanchor='top',
                            y=0.98,
                            font = dict(
                                family=img__font_family,
                                size=img__title_font_size,
                                color=img__font_color
                            )
                        ),
                        plot_bgcolor='white',
                        paper_bgcolor='white',
                        xaxis=dict(
                            title=img__xaxis_title,
                            range=img__xaxis_range,
                            automargin=True,
                            showline=True,
                            showgrid=True,
                            linewidth=img__axis_width,
                            linecolor=img__line_color,
                            zerolinecolor=img__zline_color,
                            gridcolor=img__glines_color
                        ),
                        yaxis=dict(
                            title=img__yaxis_title,
                            range=img__yaxis_range,
                            automargin=True,
                            showline=True,
                            showgrid=True,
                            linewidth=img__axis_width,
                            linecolor=img__line_color,
                            zerolinecolor=img__zline_color,
                            gridcolor=img__glines_color
                        ),
                        width=fig_size_in[0]*dpi,
                        height=fig_size_in[1]*dpi,
                        font=dict(
                            family=img__font_family,
                            color=img__font_color,
                            size=img__font_size
                        ),
                        margin=dict(
                            l=10,
                            r=10,
                            b=10,
                            t=50,
                            pad=0
                        ),
                        showlegend=True,
                        legend=dict(
                            orientation="v",
                            xanchor="left",
                            x=0.03,  
                            yanchor="top",
                            y=0.9,  
                            itemsizing='trace',  # To ensure items in legend keep the same size
                            traceorder="normal",
                            bgcolor="White",  # Background color
                            bordercolor="Black",  # Border color
                            borderwidth=1,  # Border width
                            groupclick="toggleitem",
                            itemclick="toggleothers",
                            itemdoubleclick="toggle",
                            tracegroupgap=100,
                            font_size=img__legend_font_size
                        )
)


**Figure 1** Flowchart of the Water-Futures ensemble forecasting framework.

**Figure 2** Characteristic of the weeekly demand pattern of the DMAs (from panel (a) DMA A, to panel (j) DMA j)

*We plot a subfigure for each DMA. In the x-axis there is the hour of the week (from 0 to 168). On the y-axis there is the signal scaled by the mean. The signal is the median and the xx percentile with a fill in between.

In [None]:
img__title = "Weekly Demand Patterns of the DMAs"
img__subtitles = dmas_characteristics.index

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Mean-scaled flow [-]"

dma = "DMA_H"
img__xaxis_range = [0, WEEK_LEN]
img__xaxis_ticks = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
img__xaxis_subticks = list(range(0, WEEK_LEN + 1, 8))
img__yaxis_range = [0, 1.5]

fig_size = (6, 12)
fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    yvalues = dmas_h_q[dma].groupby([dmas_h_q.index.dayofweek, dmas_h_q.index.hour]).median().values
    yscale = yvalues.mean()
    yvalues = yvalues / yscale
    fig.add_trace(go.Scatter(
            x=np.arange(WEEK_LEN),
            y= yvalues,
            mode='lines',
            line=dict(color=img__color_blue, width=img__line_width)
        ),
        row=row, col=col
    )
    fig.add_trace(go.Scatter(
            x=np.arange(WEEK_LEN),
            y= dmas_h_q[dma].groupby([dmas_h_q.index.dayofweek, dmas_h_q.index.hour]).quantile(0.75).values / yscale,
            mode='lines',
            line=dict(color=img__color_blue, width=1),
            fill='tonexty',
            fillcolor=with_alpha(img__color_blue, 0.5)
        ),
        row=row, col=col
    )
    fig.add_trace(go.Scatter(
            x=np.arange(WEEK_LEN),
            y= yvalues,
            mode='lines',
            line=dict(color=img__color_blue, width=0)
        ),
        row=row, col=col
    )
    # Add a lighter fill between the x percentile and the 100-x percentile
    fig.add_trace(go.Scatter(
            x=np.arange(WEEK_LEN),
            y= dmas_h_q[dma].groupby([dmas_h_q.index.dayofweek, dmas_h_q.index.hour]).quantile(0.25).values / yscale,
            mode='lines',
            line=dict(color=img__color_blue, width=1),
            fill='tonexty',
            fillcolor=with_alpha(img__color_blue, 0.5)
        ),
        row=row, col=col
    )

fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)

fig.show()

In [None]:
res = wfe.results
#models needed to showcase
models_needed = ['AutoRollingAverage','LGBMsimple','WaveNet', 'XGBMsimple']

fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)


for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1
    demand_ground_truth = wfe.demand[dma]

    for j ,model in enumerate(models_needed):
        demand_predict = res[model]["iter_1"]["train"]["seed_0"]["forecast"][dma]
        error = demand_predict - demand_ground_truth
        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=demand_ground_truth.index, y=error,
                                 name=f'Error of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
res = {
    'demand': wfe.demand
}

selected_models = ['AutoRollingAverage', 'LGBMrobust', 'WaveNet', 'XGBMsimple']

for i, model in enumerate (selected_models):
    res[model] = {
        'forecast': None,
        'error': None,
        'abs_error': None,
        'abs_pct_error': None,
        'crps': None
    }

    # Create a multi index (time, seed) pandas dataframe for each variable
    # Column is the dma
    fcst_list = []
    for seed in wfe.results[model]['iter_1']['train']:
        fcst_list.append(wfe.results[model]['iter_1']['train'][seed]['forecast'])
    res[model]['forecast'] = pd.concat(fcst_list,
                            keys=range(len(fcst_list)), 
                            names=['Seed', 'Date'])
    
    res[model]['error'] = res[model]['forecast'] - res['demand']
    res[model]['abs_error'] = (res[model]['forecast'] - res['demand']).abs()
    res[model]['abs_pct_error'] = (res[model]['forecast'] - res['demand']).abs() / res['demand']
    res[model]['crps'] = crps_for_ensemble(res[model]['forecast'].to_xarray(), res['demand'].to_xarray(), ensemble_member_dim='Seed', preserve_dims='Date').to_dataframe()
    

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Mean Absolute Error [-]"



fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    for j ,model in enumerate(selected_models):
        
        error_mae = res[model]["abs_error"][dma] .groupby("Date").mean()  #TODO divide by the average of the DMA

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=demand_ground_truth.index, y=error_mae,
                                 name=f'Error of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Mean Absolute Percentage Error [-]"
fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    for j ,model in enumerate(selected_models):
        
        error_mape = res[model]["abs_pct_error"][dma] .groupby("Date").mean()   #TODO fix the path

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=demand_ground_truth.index, y=error_mape,
                                 name=f'Error of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE ---> Day average

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Mean Absolute Error [-]"



fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    
    for j ,model in enumerate(selected_models):
        


        error_mae = res[model]["abs_error"].groupby("Date").mean()
        error_mae = error_mae.groupby(error_mae.index.weekday).mean()
                                 #.resample("D").mean()   #TODO divide by the average of the DMA

        #error_mae['day_of_week'] = error_mae['Date']

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=error_mae.index, y=error_mae[dma],
                                 name=f'Error of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE ---> Day average

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Mean Absolute Error [-]"


fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)


for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    
    for j ,model in enumerate(selected_models):
        


        error_mae = res[model]["abs_error"].groupby("Date").mean()

        df1 = error_mae.groupby((error_mae.index.weekday + 1) * (error_mae.index.hour + 1)).mean().rename_axis('HourOfWeek').reset_index()

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=df1['HourOfWeek'], y=df1[dma],
                                 name=f'Error of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Continuous Ranked Probability Score [-]"
fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    for j ,model in enumerate(selected_models):
        
        error_mape = res[model]["crps"][dma]

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=demand_ground_truth.index, y=error_mape,
                                 name=f'CRPS of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE

img__xaxis_title = "Time of the week [h]"
img__yaxis_title = "Continuous Ranked Probability Score [-]"
fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)

for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    for j ,model in enumerate(selected_models):
        
        error_mape = res[model]["crps"][dma]
        error_mape = error_mape.groupby(error_mae.index.weekday).mean()

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=error_mape.index, y=error_mape,
                                 name=f'CRPS of {model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='Observations', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
#plot MEAN ABSOLUTE ERROR --> MAE ---> Day average

img__xaxis_title = "Hour of the week [h]"
img__yaxis_title = "CRPS [L/s]"


fig = go.Figure()
fig = make_subplots(rows=5, cols=2, 
                    shared_xaxes=True, shared_yaxes=True,
                    horizontal_spacing=0.1, vertical_spacing=0.1,
                    subplot_titles=img__subtitles,
                    x_title=img__xaxis_title,
                    y_title=img__yaxis_title)
fig.update_layout(
    width=1600,   # Increase the width as needed
    height=1200    # Increase the height as needed
)


for i, dma in enumerate(dmas_characteristics.index):
    row = i // 2 + 1
    col = i % 2 + 1

    
    for j ,model in enumerate(selected_models):
        


        error_mae = res[model]["crps"].groupby("Date").mean()

        df1 = error_mae.groupby((error_mae.index.weekday + 1) * (error_mae.index.hour + 1)).mean().rename_axis('HourOfWeek').reset_index()

        # Add the demand first as its black and we want it to be on the bottom
        fig.add_trace(go.Scatter(x=df1['HourOfWeek'], y=df1[dma],
                                 name=f'{model}', mode='lines', line=dict(color=colors[j]),
                                 legendgroup='{model}', showlegend=True
                                ), row=row, col=col)

fig.show()

In [None]:
crps_for_ensemble?