# Step 0: visual inpection of the input data to discover problems and features in the data

In [1]:
# Import the necessary external libraries 
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import sys
sys.path.append('..')
from eval.evaluator import load_data
from preprocessing.advanced_transforms import load_characteristics
dmas_h_q, wea_h = load_data()
dmas_characteristics = load_characteristics()
DAY_LEN = 24
WEEK_LEN = 7 * DAY_LEN
WGROUP_LEN = 4 * WEEK_LEN # If we group regularly with grous of 4 weeks we have 13 months
YEAR_LEN = 52 * WEEK_LEN

  from .autonotebook import tqdm as notebook_tqdm
  return Index(sequences[0], name=names)
  return Index(sequences[0], name=names)


In [2]:
# Define the figure layout and the color palette

# Size of the image should be (height) 6,5 cm x (width) 12 cm (2,56 x 4,72 in)
fig_size_in = (4.72, 2.56)
dpi_mac = 227
dpi = dpi_mac*0.75


# Overwrite these variables for the igure appearance 
img__font_family = "Lato"
img__font_color = "black"
img__font_size = 14
img__title = "Title"
img__title_font_size = 22
img__xaxis_title = "X-Axis"
img__xaxis_range = [0, 1]
img__line_color = "grey"
img__zline_color = "black"
img__glines_color = "lightgrey"
img__axis_width = 1
img__yaxis_title = "Y-Axis"
img__yaxis_range = [0, 1]
img__legend_font_size = 16

img__color_blue = "#22409C"
img__color_green = "#009344"
img__line_width = 5
colors = [img__color_blue, img__color_green, "#FFA500"]

def fix_layout(a_fig: go.Figure) -> None:
    a_fig.update_layout(
        title=dict(
            
                            text=img__title,
                            xanchor='center',
                            x=0.5,
                            yanchor='top',
                            y=0.98,
                            font = dict(
                                family=img__font_family,
                                size=img__title_font_size,
                                color=img__font_color
                            )
                        ),
                        plot_bgcolor='white',
                        paper_bgcolor='white',
                        xaxis=dict(
                            title=img__xaxis_title,
                            range=img__xaxis_range,
                            automargin=True,
                            showline=True,
                            showgrid=True,
                            linewidth=img__axis_width,
                            linecolor=img__line_color,
                            zerolinecolor=img__zline_color,
                            gridcolor=img__glines_color
                        ),
                        yaxis=dict(
                            title=img__yaxis_title,
                            range=img__yaxis_range,
                            automargin=True,
                            showline=True,
                            showgrid=True,
                            linewidth=img__axis_width,
                            linecolor=img__line_color,
                            zerolinecolor=img__zline_color,
                            gridcolor=img__glines_color
                        ),
                        width=fig_size_in[0]*dpi,
                        height=fig_size_in[1]*dpi,
                        font=dict(
                            family=img__font_family,
                            color=img__font_color,
                            size=img__font_size
                        ),
                        margin=dict(
                            l=10,
                            r=10,
                            b=10,
                            t=50,
                            pad=0
                        ),
                        showlegend=True,
                        legend=dict(
                            orientation="v",
                            xanchor="left",
                            x=0.03,  
                            yanchor="top",
                            y=0.9,  
                            itemsizing='trace',  # To ensure items in legend keep the same size
                            traceorder="normal",
                            bgcolor="White",  # Background color
                            bordercolor="Black",  # Border color
                            borderwidth=1,  # Border width
                            groupclick="toggleitem",
                            itemclick="toggleothers",
                            itemdoubleclick="toggle",
                            tracegroupgap=100,
                            font_size=img__legend_font_size
                        )
)


## Real data
We are using real unprocessed data from a SCADA system. Failures, leaks of the cyber-physical system can appear.

There is a burst on 4th of July 2021 and many gaps on DMA H, for example between 13th and 17th of August 2021.
Therefore let's show two day before and two day after these events.

In [30]:
# Plot the burst on DMA H
img__title = "Example of a probable pipe failure (burst) in DMA H"
img__xaxis_title = "Time of the day (h)"
img__yaxis_title = "Flow (L/s)"

dma = "DMA_H"
start_time = '2021-07-02 00:00:00'
end_time = '2021-07-06 00:00:00'
img__xaxis_range = [start_time, end_time]
img__yaxis_range = [0, 100]
avg_consumption = dmas_h_q[dma].mean()

img__legend_labels = ["DMA H", "DMA H - Avg. consumption"]

fig = go.Figure()
fig.add_trace(go.Scatter(
        x=dmas_h_q.index,
        y=dmas_h_q[dma],
        mode='lines',
        line=dict(color=img__color_blue, width=img__line_width),
        name=img__legend_labels[0]
    )
)
fig.add_trace(go.Scatter(
        x=img__xaxis_range,
        y=[avg_consumption, avg_consumption],
        mode='lines',
        line=dict(color="gray", width=1.5, dash='dot'),
        name=img__legend_labels[1]
    )
)
fix_layout(fig)
fig.show()

In [12]:
# Plot the gap on DMA H
img__title = "Example of an instrumentation downtime in DMA H"

start_time = '2021-08-13 00:00:00'
end_time = '2021-08-17 00:00:00'
img__xaxis_range = [start_time, end_time]

fig = go.Figure()
fig.add_trace(go.Scatter(
        x=dmas_h_q.index,
        y=dmas_h_q[dma],
        mode='lines',
        line=dict(color=img__color_green, width=img__line_width),
        name=img__legend_labels[0]
    )
)
fig.add_trace(go.Scatter(
        x=img__xaxis_range,
        y=[avg_consumption, avg_consumption],
        mode='lines',
        line=dict(color="grey", width=1.5, dash='dot'),
        name=img__legend_labels[1]
    )
)
fix_layout(fig)
fig.show()

## Weekday
The day of the week strongly influences the pattern. Let's look at two DMAs (E-residetial and J-port) and the difference between a workday and a holiday. E.g. Tuesday 18th  May 2021 and Sunday 23th May 2021


In [41]:
# Plot the average of the flow on workdays on DMA E and DMA J
img__title = "Typical daily consumption pattern during workdays and holidays"
img__xaxis_title = "Time of the day (h)"
img__yaxis_range = [0, 120]

dmas = ['DMA_E']
avg_flow_workdays = dmas_h_q[dmas_h_q.index.weekday<5][dmas]
avg_flow_workdays = avg_flow_workdays.groupby(avg_flow_workdays.index.time).mean()

avg_flow_non_workdays = dmas_h_q[dmas_h_q.index.weekday>=5][dmas]
avg_flow_non_workdays = avg_flow_non_workdays.groupby(avg_flow_non_workdays.index.time).mean()

avg_consumption = dmas_h_q[dmas[0]].mean()

img__legend_labels = ["DMA E - Workday", "DMA E - Holiday", "DMA E - Avg. consumption"]

fig = go.Figure()
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter(
        x=avg_flow_workdays.index,
        y=avg_flow_workdays[dma],
        mode='lines',
        line=dict(color= colors[d], width=img__line_width),
        name=img__legend_labels[0]
        )
    )
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter(
        x=avg_flow_non_workdays.index,
        y=avg_flow_non_workdays[dma],
        mode='lines',
        line=dict(color= colors[d+1], width=img__line_width),
        name=img__legend_labels[1]
    )
)

fig.add_hline(y=avg_consumption, line=dict(color="gray", width=1.5, dash='dot'), annotation_text=img__legend_labels[2], annotation_position="top left")

fix_layout(fig)
fig.update_xaxes(tickvals=['00:00:00', '04:00:00', '08:00:00', '12:00:00', '16:00:00', '20:00:00','23:00:00'], 
                 ticktext=['00:00', '04:00', '08:00', '12:00', '16:00', '20:00','23:00'])
fig.update_legends(x=0.65, y=0.3)
fig.show() 

## Location 

City centre vs suburbs

In [6]:
# Plot the average of the flow on DMA B and C (countryside)

img__title = "Typical daily consumption pattern in the countryside and the city centre"
img__yaxis_title = "Flow per capita (L/s)"
img__yaxis_range = [0, 0.015]

dmas = ['DMA_C', 'DMA_H']
img__legend_labels = ["DMA C - Countryside", "DMA H - City centre"]
avg_flow = dmas_h_q[dmas].groupby(dmas_h_q.index.hour).mean()/dmas_characteristics.loc[dmas, 'population']

fig = go.Figure()
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter(
        x=avg_flow.index,
        y=avg_flow[dma],
        mode='lines',
        line=dict(color= colors[d], width=img__line_width),
        name=img__legend_labels[d]
        )
    )

fix_layout(fig)
fig.update_xaxes(tickvals=[0, 4, 8, 12, 16, 20, 23], 
                 ticktext=['00:00', '04:00', '08:00', '12:00', '16:00', '20:00','23:00'])
fig.show()


## Activity
Flow in special DMAs DMA A (hospital), DMA H city centre, DMA F (sport facilities), DMA J port

In [28]:
# Plot the average of the flow on DMA A and B (hospital vs countryside)

img__title = "Typical daily consumption pattern for different DMAs"
img__yaxis_title = "Flow per capita (L/s)"
img__yaxis_range = [0, 0.08]

dmas = ['DMA_A', 'DMA_B', 'DMA_J']
img__legend_labels = ["DMA A - Hospital", "DMA B - Countryside", "DMA J - Port"]
avg_flow = dmas_h_q[dmas].groupby(dmas_h_q.index.hour).mean()/dmas_characteristics.loc[dmas, 'population']

fig = go.Figure()
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter(
        x=avg_flow.index,
        y=avg_flow[dma],
        mode='lines',
        line=dict(color= colors[d], width=img__line_width),
        name=img__legend_labels[d]
        )
    )

fix_layout(fig)
fig.update_xaxes(tickvals=[0, 4, 8, 12, 16, 20, 23], 
                 ticktext=['00:00', '04:00', '08:00', '12:00', '16:00', '20:00','23:00'])
fig.update_legends(x=0.02, y=0.98)
fig.show()


## Season 

Normalized water consumption on DMA B winter vs summer day

In [43]:
# Plot the average of the flow on workdays on DMA E and DMA J
img__title = "Typical daily consumption pattern between seasons"
img__xaxis_title = "Time of the day (h)"
img__yaxis_title = "Flow (L/s)"
img__yaxis_range = [0, 15]

dmas = ['DMA_B']
avg_flow_summer = dmas_h_q[(dmas_h_q.index.month>=4) & (dmas_h_q.index.month<9)][dmas]
avg_flow_summer = avg_flow_summer.groupby(avg_flow_summer.index.time).mean()

avg_flow_winter = dmas_h_q[(dmas_h_q.index.month>=10) | (dmas_h_q.index.month<3)][dmas]
avg_flow_winter = avg_flow_winter.groupby(avg_flow_winter.index.time).mean()

avg_consumption = dmas_h_q[dmas[0]].mean()

img__legend_labels = ["DMA B - Winter", "DMA B - Summer", "DMA B - Avg. consumption"]

fig = go.Figure()
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter(
        x=avg_flow_winter.index,
        y=avg_flow_winter[dma],
        mode='lines',
        line=dict(color= colors[d], width=img__line_width),
        name=img__legend_labels[0]
        )
    )
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter(
        x=avg_flow_summer.index,
        y=avg_flow_summer[dma],
        mode='lines',
        line=dict(color= colors[d+2], width=img__line_width),
        name=img__legend_labels[1]
    )
)

fig.add_hline(y=avg_consumption, line=dict(color=img__color_green, width=1.5, dash='dot'), annotation_text=img__legend_labels[2], annotation_position="top left")

fix_layout(fig)
fig.update_xaxes(tickvals=['00:00:00', '04:00:00', '08:00:00', '12:00:00', '16:00:00', '20:00:00','23:00:00'], 
                 ticktext=['00:00', '04:00', '08:00', '12:00', '16:00', '20:00','23:00'])
fig.update_legends(x=0.65, y=0.3)
fig.show() 

## Trends

DMA B(Jan21- Jan22) summer behaviour rolling mean and DMA F trend leak (September 2021 March 23)

In [35]:
# Plot DMA C rolling average of the daily flow 
img__title = "Daily flow across the year"
img__xaxis_title = "Time (days)"
img__xaxis_range = ['2021-01-01 00:00:00', '2021-12-31 00:00:00']
img__yaxis_title = "Flow (L/s)"
img__yaxis_range = [0, 10]

dmas = ['DMA_C']
img__legend_labels = ["DMA C - Countryside"]
avg_flow = dmas_h_q[dmas].groupby(dmas_h_q.index.date).mean().rolling(window=5).mean()

fig = go.Figure()
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter
    (
        x=avg_flow.index,
        y=avg_flow[dma],
        mode='lines',
        line=dict(color= colors[d], width=img__line_width),
        name=img__legend_labels[d]
    )
)
    
fig.add_hline(y=4.3, line_dash="dot", line_color=colors[2], line_width=1.5, annotation_text="2021 Winter avg. consumption", annotation_position="top right")
fig.add_hline(y=3.6, line_dash="dot", line_color=colors[2], line_width=1.5, annotation_text="2022 Winter avg. consumption", annotation_position="bottom right")

fix_layout(fig)
fig.show()

In [39]:
# Plot DMA C rolling average of the daily flow 
img__title = "Daily flow across the year"
img__xaxis_title = "Time (days)"
img__xaxis_range = ['2021-09-01 00:00:00', '2022-04-01 00:00:00']
img__yaxis_title = "Flow (L/s)"
img__yaxis_range = [0, 15]

dmas = ['DMA_F']
img__legend_labels = ["DMA F - Sport facilities"]
avg_flow = dmas_h_q[dmas].groupby(dmas_h_q.index.date).mean().rolling(window=5).mean()

fig = go.Figure()
for d, dma in enumerate(dmas):
    fig.add_trace(go.Scatter
    (
        x=avg_flow.index,
        y=avg_flow[dma],
        mode='lines',
        line=dict(color= colors[d+1], width=img__line_width),
        name=img__legend_labels[d]
    )
)
    
fig.add_trace(go.Scatter(
        x= img__xaxis_range,
        y=[6,13],
        mode='lines',
        line=dict(color=colors[2], width=1.5, dash='dot'),
        showlegend=False
    )
)
fig.add_annotation(x='2022-03-01 00:00:00', y=12, text="Trend")
    
fix_layout(fig)
fig.show()