# Set-up

In [29]:
import pandas as pd
from utils import weekday_mapping, month_mapping
import holidays

# Plotting imports
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.subplots as sp
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

In [40]:
deep_palette = sns.color_palette('deep')

# Matplotlib 'cool' palette
cool_matplotlib = plt.cm.cool(range(256))
# Extract every 16th color to get a subset
subset_cool_matplotlib = cool_matplotlib[::256//11]
# Convert Matplotlib color format to Seaborn color format
cool_palette = sns.color_palette(subset_cool_matplotlib)

In [30]:
def get_string_color(color, opacity=1):
    r, g, b = tuple(int(value * 255) for value in color)
    return f'rgba({r}, {g}, {b}, {opacity})'

# Read curves

In [6]:
input_df = pd.read_excel('data/Dati UPRE.xlsx')
print(len(input_df))
input_df

78840


Unnamed: 0,UPRE (Cliente),Anno,Mese,Data,Ora,kWh (prelievo dalla rete),Settore
0,UPRE_H,2023,1,2023-01-01,1,3009.50,Mezzi di trasporto
1,UPRE_H,2023,1,2023-01-01,2,3009.50,Mezzi di trasporto
2,UPRE_H,2023,1,2023-01-01,3,3009.50,Mezzi di trasporto
3,UPRE_H,2023,1,2023-01-01,4,2996.50,Mezzi di trasporto
4,UPRE_H,2023,1,2023-01-01,5,3035.50,Mezzi di trasporto
...,...,...,...,...,...,...,...
78835,UPRE_G,2023,12,2023-12-31,20,28402.00,Metalli non ferrosi
78836,UPRE_G,2023,12,2023-12-31,21,28352.50,Metalli non ferrosi
78837,UPRE_G,2023,12,2023-12-31,22,28297.50,Metalli non ferrosi
78838,UPRE_G,2023,12,2023-12-31,23,28578.00,Metalli non ferrosi


In [19]:
df = input_df.pivot(columns='Settore', values='kWh (prelievo dalla rete)', index=['Anno', 'Mese', 'Data', 'Ora'])
df.reset_index(inplace=True)
df.columns.name = None

holidays_it = holidays.IT(years=df.Anno.unique()) # Retrieve holidays in Italy
df['weekday'] = df.Data.dt.weekday.map(weekday_mapping) # Add weekday

# Create daytype variable
df['daytype'] = 'Working day'
df.loc[df.weekday == 'Saturday', 'daytype'] = 'Saturday'
df.loc[(df.weekday == 'Sunday') | df.Data.apply(lambda day: day in holidays_it), 'daytype'] = 'Holiday'
df.head()

Unnamed: 0,Anno,Mese,Data,Ora,Alimentare,Cartaria,"Cemento, calce e gesso",Ceramiche e vetrarie,Chimica,Meccanica,Metalli non ferrosi,Mezzi di trasporto,Siderurgia,weekday,daytype
0,2023,1,2023-01-01,1,156.0,805.2,3874.0,11688.6,63720.0,30214.35,46640.0,3009.5,15780.948682,Sunday,Holiday
1,2023,1,2023-01-01,2,149.5,789.36,3796.0,11655.6,63840.0,30400.425,45969.0,3009.5,15426.590908,Sunday,Holiday
2,2023,1,2023-01-01,3,146.25,789.36,3419.0,11649.0,63750.0,30285.15,46406.25,3009.5,15842.233257,Sunday,Holiday
3,2023,1,2023-01-01,4,149.5,786.72,8125.0,11668.8,63570.0,29952.525,46739.0,2996.5,15771.318576,Sunday,Holiday
4,2023,1,2023-01-01,5,146.25,781.44,8476.0,11662.2,64140.0,30445.35,46774.75,3035.5,15784.031929,Sunday,Holiday


In [28]:
sectors = list(input_df.Settore.unique())
# Create subplots with two rows and one column
fig = sp.make_subplots(rows=3, cols=3, shared_xaxes=True, subplot_titles=sectors)

# select_condition = pd.Series(len(df) * [True], index=df.index)
select_condition = df.daytype == 'Working day'

for i, col in enumerate(sectors):
    for k, Data in enumerate(df[select_condition].Data.sample(frac=.1).unique()):
        day_df = df[select_condition & (df.Data == Data)]
        fig.add_trace(go.Scatter(y=day_df[col], x=day_df['Ora'], mode="lines", line=dict(color='rgba(0, 0, 255, 0.1)')), row=1+i//3, col=1+i%3)
    mean_df = df[select_condition].groupby('Ora', as_index=False)[sectors].mean()
    fig.add_trace(go.Scatter(y=mean_df[col], x=mean_df['Ora'], mode="lines", line=dict(color='rgba(255, 0, 0, 1)')), row=1+i//3, col=1+i%3)
    fig.update_traces(showlegend=False)
fig.update_layout(height=800, width=1000)
fig.update_xaxes(range=[0, 24])
fig.show()

In [38]:
# Create subplots with two rows and one column
fig = sp.make_subplots(rows=3, cols=3, shared_xaxes=True, subplot_titles=sectors)

#select_condition = (df.monthofyear == 8)
select_condition = pd.Series(len(df) * [True], index=df.index)

# For each region
for i, col in enumerate(sectors):
    # For each type of Data (Working Data, Saturday, Sunday, Holiday)
    for j, daytype in enumerate(df.daytype.unique()):
        # We set the list of days to plot, subsampling for visibility
        days_to_plot = df[select_condition & (df.daytype == daytype)].Data.drop_duplicates().sample(frac=0.1)
        # For each Data to plot
        for k, Data in enumerate(days_to_plot):
            day_df = df[df.Data == Data]
            # Plotting the line for each single Data
            fig.add_trace(
                go.Scatter(
                    y = day_df[col],
                    x = day_df['Ora'],
                    mode = "lines",
                    line = dict(color=get_string_color(deep_palette[j], opacity=0.1)),
                    showlegend = False
                ),
                row = 1+i//3,
                col = 1+i%3
            )
        # Getting the mean curve
        mean_df = df[select_condition & (df.daytype == daytype)].groupby('Ora', as_index=False)[col].mean()
        # Plotting the mean
        fig.add_trace(
            go.Scatter(
                y = mean_df[col],
                x = mean_df['Ora'],
                mode = "lines",
                line = dict(color=get_string_color(deep_palette[j])),
                name = daytype,
                showlegend = (i == 0) # We plot the legend for the first one only
            ),
            row=1+i//3,
            col=1+i%3
        )


# Update layout to adjust spacing
fig.update_layout(height=1000, width=1500)
fig.update_xaxes(range=[0, 24])
fig.show()

In [49]:
# Create subplots with two rows and one column
fig = sp.make_subplots(rows=3, cols=3, shared_xaxes=True, subplot_titles=sectors)

frac = 0.3
select_condition = df.daytype == 'Working day'

# For each region
for i, col in enumerate(sectors):
    # For each type of day (Working day, Saturday, Sunday, Holiday)
    for j, month in enumerate(df.Mese.unique()):
        # We set the list of days to plot, subsampling for visibility
        days_to_plot = df[select_condition & (df.Mese == month)].Data.drop_duplicates().sample(frac=frac)
        # For each day to plot
        for k, day in enumerate(days_to_plot):
            if col != 'Calabria' or day.year > 2020: # Calabria region doesn't have data for year before 2021
                day_df = df[df.Data == day]
                # Plotting the line for each single day
                fig.add_trace(
                    go.Scatter(
                        y = day_df[col],
                        x = day_df['Ora'],
                        mode = "lines",
                        line = dict(color=get_string_color(cool_palette[j], opacity=0.1)),
                        showlegend = False
                    ),
                    row = 1+i//3,
                    col = 1+i%3
                )
        # Getting the mean curve
        mean_df = df[select_condition & (df.Mese == month)].groupby('Ora', as_index=False)[col].mean()
        # Plotting the mean
        fig.add_trace(
            go.Scatter(
                y = mean_df[col],
                x = mean_df['Ora'],
                mode = "lines",
                line = dict(color=get_string_color(cool_palette[j])),
                name = month_mapping[month],
                showlegend = (i == 0) # We plot the legend for the first one only
            ),
            row=1+i//3,
            col=1+i%3
        )


# Update layout to adjust spacing
fig.update_layout(height=1000, width=1500)

# Show the plot
fig.show()

<HR>

# TESTS

In [39]:
df

Unnamed: 0,Anno,Mese,Data,Ora,Alimentare,Cartaria,"Cemento, calce e gesso",Ceramiche e vetrarie,Chimica,Meccanica,Metalli non ferrosi,Mezzi di trasporto,Siderurgia,weekday,daytype
0,2023,1,2023-01-01,1,156.00,805.20,3874.0,11688.6,63720.0,30214.350,46640.00,3009.5,15780.948682,Sunday,Holiday
1,2023,1,2023-01-01,2,149.50,789.36,3796.0,11655.6,63840.0,30400.425,45969.00,3009.5,15426.590908,Sunday,Holiday
2,2023,1,2023-01-01,3,146.25,789.36,3419.0,11649.0,63750.0,30285.150,46406.25,3009.5,15842.233257,Sunday,Holiday
3,2023,1,2023-01-01,4,149.50,786.72,8125.0,11668.8,63570.0,29952.525,46739.00,2996.5,15771.318576,Sunday,Holiday
4,2023,1,2023-01-01,5,146.25,781.44,8476.0,11662.2,64140.0,30445.350,46774.75,3035.5,15784.031929,Sunday,Holiday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2023,12,2023-12-31,20,130.00,921.36,14378.0,10711.8,68100.0,32280.900,28402.00,3055.0,15671.828440,Sunday,Holiday
8756,2023,12,2023-12-31,21,130.00,910.80,14560.0,10665.6,67992.0,32082.450,28352.50,3120.0,15661.039092,Sunday,Holiday
8757,2023,12,2023-12-31,22,133.25,897.60,14001.0,10527.0,68244.0,31837.125,28297.50,3159.0,15620.956881,Sunday,Holiday
8758,2023,12,2023-12-31,23,136.50,897.60,8840.0,10619.4,68208.0,31659.750,28578.00,3165.5,15663.472939,Sunday,Holiday
