# Chapter 3 Analyzing and Visualizing Time Series Date

In [1]:
%cd ../

c:\Users\dioni\Documents\GitHub\Modern-Time-Series-Forecasting-with-Python


In [31]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import os
import plotly.io as pio
from itertools import cycle
pio.templates.default = "plotly_white"
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from IPython.display import display, HTML
# %load_ext autoreload
# %autoreload 2
np.random.seed()
tqdm.pandas()

In [3]:
os.makedirs("imgs/chapter_3", exist_ok=True)
preprocessed = Path("data/london_smart_meters/preprocessed")
assert preprocessed.is_dir(), "You have to run 02 - Preprocessing London Smart Meter Dataset.ipynb in Chapter02 before running this notebook"

In [5]:
from src.utils import plotting_utils

  from tqdm.autonotebook import tqdm


In [42]:
def format_plot(fig, legends = None, xlabel = 'Time', ylabel = 'Value', figsize=(500,650), font_size = 15, title_font_size=20):
    if legends:
        names = cycle(legends)
        fig.for_each_trace(lambda trace: trace.update(name=next(names)))
    fig.update_layout(
        autosize=False,
        width=figsize[1],
        height=figsize[0],
        title={
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        },
        titlefont={
            'size' : 20
        },
        legend_title = None,
        legend = dict(
            font = dict(size = font_size),
            orientation = 'h',
            yanchor = 'bottom',
            y = 0.98,
            xanchor = 'right',
            x = 1
        ),
        yaxis=dict(
                title_text=ylabel,
                titlefont=dict(size=font_size),
                tickfont=dict(size=font_size),
            ),
            xaxis=dict(
                title_text=xlabel,
                titlefont=dict(size=font_size),
                tickfont=dict(size=font_size),
            )
        )
    return fig

## Reading blocks 0-7

In [6]:
from src.utils.data_utils import compact_to_expanded

In [9]:
try:
    block_df = pd.read_parquet(preprocessed/"london_smart_meters_merged_block_0-7.parquet")
    display(block_df.head())
except FileNotFoundError:
    display(HTML("""
    <div class="alert alert-block alert-warning">
    <b>Warning!</b> File not found. Please make sure you have run 02 - Preprocessing London Smart Meter Dataset.ipynb in Chapter02
    </div>
    """))

Unnamed: 0,LCLid,start_timestamp,frequency,energy_consumption,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,...,windBearing,temperature,dewPoint,pressure,apparentTemperature,windSpeed,precipType,icon,humidity,summary
0,MAC000002,2012-10-13,30min,"[0.263, 0.2689999999999999, 0.275, 0.256, 0.21...",24144,Std,ACORN-A,Affluent,block_0,"[NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDA...",...,"[186, 186, 188, 188, 190, 190, 203, 203, 206, ...","[8.78, 8.78, 8.27, 8.27, 7.87, 7.87, 7.89, 7.8...","[6.28, 6.28, 6.21, 6.21, 6.22, 6.22, 6.76, 6.7...","[1007.7, 1007.7, 1007.36, 1007.36, 1006.73, 10...","[7.55, 7.55, 7.34, 7.34, 6.75, 6.75, 6.89, 6.8...","[2.28, 2.28, 1.81, 1.81, 1.95, 1.95, 1.83, 1.8...","[rain, rain, rain, rain, rain, rain, rain, rai...","[clear-night, clear-night, clear-night, clear-...","[0.84, 0.84, 0.87, 0.87, 0.89, 0.89, 0.93, 0.9...","[Clear, Clear, Clear, Clear, Partly Cloudy, Pa..."
1,MAC000246,2012-01-01,30min,"[0.509, 0.317, 0.253, 0.249, 0.93, 0.607, 0.10...",37872,Std,ACORN-A,Affluent,block_0,"[NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDA...",...,"[229, 229, 238, 238, 229, 229, 231, 231, 227, ...","[12.12, 12.12, 12.59, 12.59, 12.45, 12.45, 12....","[10.97, 10.97, 11.02, 11.02, 11.04, 11.04, 10....","[1008.1, 1008.1, 1007.88, 1007.88, 1007.95, 10...","[12.12, 12.12, 12.59, 12.59, 12.45, 12.45, 12....","[5.9, 5.9, 6.06, 6.06, 5.31, 5.31, 4.68, 4.68,...","[rain, rain, rain, rain, rain, rain, rain, rai...","[partly-cloudy-night, partly-cloudy-night, clo...","[0.93, 0.93, 0.9, 0.9, 0.91, 0.91, 0.93, 0.93,...","[Mostly Cloudy, Mostly Cloudy, Overcast, Overc..."
2,MAC000450,2012-03-23,30min,"[1.337, 1.426, 0.996, 0.971, 0.994, 0.952, 0.8...",33936,Std,ACORN-A,Affluent,block_0,"[NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDA...",...,"[78, 78, 73, 73, 81, 81, 80, 80, 75, 75, 71, 7...","[8.76, 8.76, 8.54, 8.54, 8.09, 8.09, 7.34, 7.3...","[7.25, 7.25, 7.12, 7.12, 7.17, 7.17, 6.68, 6.6...","[1027.41, 1027.41, 1026.91, 1026.91, 1026.54, ...","[7.59, 7.59, 7.43, 7.43, 7.24, 7.24, 7.34, 7.3...","[2.18, 2.18, 2.07, 2.07, 1.72, 1.72, 1.34, 1.3...","[rain, rain, rain, rain, rain, rain, rain, rai...","[fog, fog, fog, fog, fog, fog, fog, fog, fog, ...","[0.9, 0.9, 0.91, 0.91, 0.94, 0.94, 0.96, 0.96,...","[Foggy, Foggy, Foggy, Foggy, Foggy, Foggy, Fog..."
3,MAC001074,2012-05-09,30min,"[0.18, 0.086, 0.106, 0.173, 0.146, 0.223, 0.21...",31680,ToU,ACORN-,ACORN-,block_0,"[NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDA...",...,"[215, 215, 207, 207, 215, 215, 216, 216, 126, ...","[11.46, 11.46, 11.38, 11.38, 11.38, 11.38, 10....","[10.23, 10.23, 10.17, 10.17, 10.24, 10.24, 10....","[1007.39, 1007.39, 1007.21, 1007.21, 1007.06, ...","[11.46, 11.46, 11.38, 11.38, 11.38, 11.38, 10....","[2.35, 2.35, 2.15, 2.15, 1.84, 1.84, 1.22, 1.2...","[rain, rain, rain, rain, rain, rain, rain, rai...","[partly-cloudy-night, partly-cloudy-night, par...","[0.92, 0.92, 0.92, 0.92, 0.93, 0.93, 0.95, 0.9...","[Partly Cloudy, Partly Cloudy, Mostly Cloudy, ..."
4,MAC003223,2012-09-18,30min,"[0.076, 0.079, 0.123, 0.109, 0.051, 0.069, 0.0...",25344,Std,ACORN-A,Affluent,block_0,"[NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDAY, NO_HOLIDA...",...,"[236, 236, 240, 240, 242, 242, 244, 244, 248, ...","[14.06, 14.06, 13.12, 13.12, 12.53, 12.53, 12....","[10.82, 10.82, 10.29, 10.29, 9.86, 9.86, 9.83,...","[1011.09, 1011.09, 1010.82, 1010.82, 1010.65, ...","[14.06, 14.06, 13.12, 13.12, 12.53, 12.53, 12....","[3.86, 3.86, 3.81, 3.81, 4.27, 4.27, 4.12, 4.1...","[rain, rain, rain, rain, rain, rain, rain, rai...","[clear-night, clear-night, clear-night, clear-...","[0.81, 0.81, 0.83, 0.83, 0.84, 0.84, 0.86, 0.8...","[Clear, Clear, Clear, Clear, Clear, Clear, Par..."


In [10]:
#Converting to expanded form
exp_block_df = compact_to_expanded(block_df[block_df.file=="block_7"], timeseries_col = 'energy_consumption',
static_cols = ["frequency", "series_length", "stdorToU", "Acorn", "Acorn_grouped", "file"],
time_varying_cols = ['holidays', 'visibility', 'windBearing', 'temperature', 'dewPoint',
       'pressure', 'apparentTemperature', 'windSpeed', 'precipType', 'icon',
       'humidity', 'summary'],
ts_identifier = "LCLid")

exp_block_df.head()

  0%|          | 0/50 [00:00<?, ?it/s]

Unnamed: 0,timestamp,LCLid,energy_consumption,frequency,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,...,windBearing,temperature,dewPoint,pressure,apparentTemperature,windSpeed,precipType,icon,humidity,summary
0,2012-01-01 00:00:00,MAC000050,0.175,30min,37872,Std,ACORN-D,Affluent,block_7,NO_HOLIDAY,...,229,12.12,10.97,1008.1,12.12,5.9,rain,partly-cloudy-night,0.93,Mostly Cloudy
1,2012-01-01 00:30:00,MAC000050,0.212,30min,37872,Std,ACORN-D,Affluent,block_7,NO_HOLIDAY,...,229,12.12,10.97,1008.1,12.12,5.9,rain,partly-cloudy-night,0.93,Mostly Cloudy
2,2012-01-01 01:00:00,MAC000050,0.313,30min,37872,Std,ACORN-D,Affluent,block_7,NO_HOLIDAY,...,238,12.59,11.02,1007.88,12.59,6.06,rain,cloudy,0.9,Overcast
3,2012-01-01 01:30:00,MAC000050,0.302,30min,37872,Std,ACORN-D,Affluent,block_7,NO_HOLIDAY,...,238,12.59,11.02,1007.88,12.59,6.06,rain,cloudy,0.9,Overcast
4,2012-01-01 02:00:00,MAC000050,0.257,30min,37872,Std,ACORN-D,Affluent,block_7,NO_HOLIDAY,...,229,12.45,11.04,1007.95,12.45,5.31,rain,partly-cloudy-night,0.91,Mostly Cloudy


Taking a single time series from the block

In [11]:
ts_df = exp_block_df[exp_block_df['LCLid']=='MAC000194'].set_index('timestamp')

In [13]:
ts_df["weekday_name"] = ts_df.index.day_name()
ts_df["weekday"] = ts_df.index.weekday
ts_df["week"] = ts_df.index.isocalendar().week
ts_df["day"] = ts_df.index.day
ts_df["hour"] = ts_df.index.hour
ts_df["date"] = ts_df.index.date
ts_df["month"] = ts_df.index.month
ts_df["month_name"] = ts_df.index.month_name()
ts_df["year"] = ts_df.index.year

In [14]:
ts_df

Unnamed: 0_level_0,LCLid,energy_consumption,frequency,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,visibility,...,summary,weekday_name,weekday,week,day,hour,date,month,month_name,year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-01 00:00:00,MAC000194,0.064,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,12.99,...,Mostly Cloudy,Sunday,6,52,1,0,2012-01-01,1,January,2012
2012-01-01 00:30:00,MAC000194,0.077,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,12.99,...,Mostly Cloudy,Sunday,6,52,1,0,2012-01-01,1,January,2012
2012-01-01 01:00:00,MAC000194,0.124,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,12.89,...,Overcast,Sunday,6,52,1,1,2012-01-01,1,January,2012
2012-01-01 01:30:00,MAC000194,0.043,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,12.89,...,Overcast,Sunday,6,52,1,1,2012-01-01,1,January,2012
2012-01-01 02:00:00,MAC000194,0.024,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,11.54,...,Mostly Cloudy,Sunday,6,52,1,2,2012-01-01,1,January,2012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-02-27 21:30:00,MAC000194,0.396,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,16.09,...,Clear,Thursday,3,9,27,21,2014-02-27,2,February,2014
2014-02-27 22:00:00,MAC000194,0.550,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,14.00,...,Clear,Thursday,3,9,27,22,2014-02-27,2,February,2014
2014-02-27 22:30:00,MAC000194,0.331,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,14.00,...,Clear,Thursday,3,9,27,22,2014-02-27,2,February,2014
2014-02-27 23:00:00,MAC000194,0.177,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,14.03,...,Clear,Thursday,3,9,27,23,2014-02-27,2,February,2014


In [15]:
#Making ordered categoricals to make for sorted plots
ts_df['month_name'] = pd.Categorical(ts_df['month_name'], categories=["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"], ordered=True)
ts_df['weekday_name'] = pd.Categorical(ts_df['weekday_name'], categories=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"], ordered=True)

In [16]:
#Interpolating Missing values
from src.imputation.interpolation import SeasonalInterpolation

ts_df['energy_consumption'] = SeasonalInterpolation(seasonal_period=48*7).fit_transform(ts_df.energy_consumption.values.reshape(-1,1))

In [18]:
ts_df.sample(4)

Unnamed: 0_level_0,LCLid,energy_consumption,frequency,series_length,stdorToU,Acorn,Acorn_grouped,file,holidays,visibility,...,summary,weekday_name,weekday,week,day,hour,date,month,month_name,year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-10-20 21:30:00,MAC000194,0.235,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,6.53,...,Partly Cloudy,Saturday,5,42,20,21,2012-10-20,10,October,2012
2013-01-26 05:00:00,MAC000194,0.103,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,5.15,...,Mostly Cloudy,Saturday,5,4,26,5,2013-01-26,1,January,2013
2014-01-18 14:00:00,MAC000194,0.022,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,12.23,...,Clear,Saturday,5,3,18,14,2014-01-18,1,January,2014
2012-04-29 15:30:00,MAC000194,0.089,30min,37872,ToU,ACORN-D,Affluent,block_7,NO_HOLIDAY,12.86,...,Partly Cloudy,Sunday,6,17,29,15,2012-04-29,4,April,2012


----

# Line Charts

## Basic Line Plot

In [43]:
fig = px.line(ts_df, y='energy_consumption', title='Energy Consumption for MAC000194')
fig = format_plot(fig, ylabel='Energy Consumption')
fig.write_image("imgs/chapter_3/line_plot.png")
fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



## Rolling Average Plot 

In [53]:
ts_df["rolling_monthly_avg"] = ts_df['energy_consumption'].rolling(window=48*30).mean()

In [54]:
fig = px.line(ts_df, y="rolling_monthly_avg", title="Rolling Monthly Average Energy Consumption for MAC000193")
fig = format_plot(fig, ylabel="Energy Consumption")
fig.write_image("imgs/chapter_3/rolling_avg_line_plot.png")
fig.show()


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [55]:
import pandas as pd

df = pd.DataFrame({
    "timestamp": pd.to_datetime(["2023-07-01", "2023-07-02", "2023-07-03", "2023-07-04", "2023-07-05"]),
    "value": [100, 200, 300, 400, 500]
})

print(df)

   timestamp  value
0 2023-07-01    100
1 2023-07-02    200
2 2023-07-03    300
3 2023-07-04    400
4 2023-07-05    500


In [58]:
df_rolling = df.value.rolling(window=3).mean()

print(df_rolling)

0      NaN
1      NaN
2    200.0
3    300.0
4    400.0
Name: value, dtype: float64
