<a href="https://colab.research.google.com/github/fabriziobasso/Colab_backup/blob/main/Chapter_IV_Baseline_Forecasts_using_NIXTLA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install statsforecast==1.7.8
!pip install datasetsforecast==0.0.8

In [2]:
# from google.colab import auth
# auth.authenticate_user()

In [3]:
%%capture
# Clone the repository
!git clone https://github.com/PacktPublishing/Modern-Time-Series-Forecasting-with-Python-2E.git

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import os
import plotly.io as pio
pio.templates.default = "plotly_white"
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
import missingno as msno
from itertools import cycle
from sklearn.metrics import mean_absolute_error
from IPython.display import display, HTML
# %load_ext autoreload
# %autoreload 2
np.random.seed()
tqdm.pandas()

# Navigate to the repository's root directory
%cd Modern-Time-Series-Forecasting-with-Python-2E

from src.utils.data_utils import compact_to_expanded
from src.imputation.interpolation import SeasonalInterpolation

/content/Modern-Time-Series-Forecasting-with-Python-2E


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
%cd

/root


In [7]:
os.makedirs("/content/drive/MyDrive/Books/Modern Time Series Forecasting/Chapter II/imgs/chapter_2", exist_ok=True)
preprocessed = Path("/content/drive/MyDrive/Books/Modern Time Series Forecasting/Data/data/london_smart_meters/data/london_smart_meters/preprocessed")

In [8]:
assert preprocessed.is_dir(), "You have to run 02 - Preprocessing London Smart Meter Dataset.ipynb in Chapter02 before running this notebook"

In [9]:
def format_plot(fig, legends = None, font_size=15, title_font_size=20):
    if legends:
        names = cycle(legends)
        fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_layout(
            autosize=False,
            width=900,
            height=500,
            title={
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
            titlefont={
                "size": title_font_size
            },
            legend_title = None,
            legend=dict(
                font=dict(size=font_size),
                orientation="h",
                yanchor="bottom",
                y=0.98,
                xanchor="right",
                x=1,
            ),
            yaxis=dict(
                title_text="Value",
                titlefont=dict(size=font_size),
                tickfont=dict(size=font_size),
            ),
            xaxis=dict(
                title_text="Day",
                titlefont=dict(size=font_size),
                tickfont=dict(size=font_size),
            )
        )
    return fig

In [10]:
import numpy as np
import pandas as pd
import time
import os
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from itertools import cycle

pio.templates.default = "plotly_white"
import warnings
import humanize

from functools import partial
from statsforecast.core import StatsForecast
from utilsforecast.plotting import plot_series
from utilsforecast.evaluation import evaluate
from statsforecast.models import (
    Naive,
    SeasonalNaive,
    HistoricAverage,
    WindowAverage,
    SeasonalWindowAverage,
    RandomWalkWithDrift,
    HoltWinters,
    ETS,
    AutoETS,
    AutoARIMA,
    ARIMA,
    AutoTheta,
    DynamicTheta,
    DynamicOptimizedTheta,
    Theta,
    OptimizedTheta,
    TBATS,
    AutoTBATS,
    MSTL

)
from datasetsforecast.losses import *
from src.utils.ts_utils import forecast_bias

import time
from src.utils import plotting_utils

from tqdm import tqdm
np.random.seed(42)
tqdm.pandas()

In [11]:
import statsforecast as stf
stf.__version__

'1.7.8'

In [12]:
# this makes it so that the outputs of the predict methods have the id as a column
# instead of as the index
os.environ['NIXTLA_ID_AS_COL'] = '1'

* **Set up Folders**

In [14]:
os.makedirs("/content/drive/MyDrive/Books/Modern Time Series Forecasting/Chapter IV/chapter_4", exist_ok=True)
preprocessed = Path("/content/drive/MyDrive/Books/Modern Time Series Forecasting/Data/data/london_smart_meters/data/london_smart_meters/preprocessed")

* **Graph Functions and Formatting**

In [None]:
def format_plot(fig, legends = None, xlabel="Time", ylabel="Value", title="", font_size=15):
    if legends:
        names = cycle(legends)
        fig.for_each_trace(lambda t:  t.update(name = next(names)))
    fig.update_layout(
            autosize=False,
            width=900,
            height=500,
            title_text=title,
            title={
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top'},
            titlefont={
                "size": 20
            },
            legend_title = None,
            legend=dict(
                font=dict(size=font_size),
                orientation="h",
                yanchor="bottom",
                y=0.98,
                xanchor="right",
                x=1,
            ),
            yaxis=dict(
                title_text=ylabel,
                titlefont=dict(size=font_size),
                tickfont=dict(size=font_size),
            ),
            xaxis=dict(
                title_text=xlabel,
                titlefont=dict(size=font_size),
                tickfont=dict(size=font_size),
            )
        )
    return fig

def plot_forecast(pred_df, forecast_columns, timestamp_col, forecast_display_names=None):
    if forecast_display_names is None:
        forecast_display_names = forecast_columns
    else:
        assert len(forecast_columns) == len(forecast_display_names)

    mask = ~pred_df[forecast_columns[0]].isnull()
    colors = [c.replace("rgb", "rgba").replace(")", ", <alpha>)") for c in px.colors.qualitative.Dark2]
    act_color = colors[0]
    colors = cycle(colors[1:])
    dash_types = cycle(["dash", "dot", "dashdot"])

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pred_df[mask][timestamp_col], y=pred_df[mask]['energy_consumption'],
                             mode='lines', line=dict(color=act_color.replace("<alpha>", "0.3")),
                             name='Actual Consumption'))

    for col, display_col in zip(forecast_columns, forecast_display_names):
        fig.add_trace(go.Scatter(x=pred_df[mask][timestamp_col], y=pred_df.loc[mask, col],
                                 mode='lines', line=dict(dash=next(dash_types), color=next(colors).replace("<alpha>", "1")),
                                 name=display_col))
    return fig

* **Load the Datasets**

In [21]:
#Readin the missing value imputed and train test split data
try:
    train_df = pd.read_parquet(preprocessed/"selected_blocks_train_missing_imputed.parquet")
    train_df = train_df[['LCLid',"timestamp","energy_consumption","frequency"]]
    val_df = pd.read_parquet(preprocessed/"selected_blocks_val_missing_imputed.parquet")
    val_df = val_df[['LCLid',"timestamp","energy_consumption","frequency"]]
    test_df = pd.read_parquet(preprocessed/"selected_blocks_test_missing_imputed.parquet")
    test_df = test_df[['LCLid',"timestamp","energy_consumption","frequency"]]
except FileNotFoundError:
    print(f"Warning: File not found in {preprocessed}. Ensure you've run '01-Setting up Experiment Harness.ipynb' in Chapter 04 and that the file path is correct.")

In [22]:
train_df.shape, val_df.shape, test_df.shape

((4293840, 4), (223200, 4), (194400, 4))

In [23]:
print("Min train_df Date: " , train_df.timestamp.min())
print("Max train_df Date: " , train_df.timestamp.max())
print("Min val_df Date: " , val_df.timestamp.min())
print("Max val_df Date: " , val_df.timestamp.max())
print("Min test_df Date: " , test_df.timestamp.min())
print("Max test_df Date: " , test_df.timestamp.max())

Min train_df Date:  2012-01-01 00:00:00
Max train_df Date:  2013-12-31 23:30:00
Min val_df Date:  2014-01-01 00:00:00
Max val_df Date:  2014-01-31 23:30:00
Min test_df Date:  2014-02-01 00:00:00
Max test_df Date:  2014-02-27 23:30:00


In [24]:
train_df = train_df[train_df.timestamp >'2012-01-01']
print("Min train_df Date: " , train_df.timestamp.min())

Min train_df Date:  2012-01-01 00:30:00


In [25]:
train_df.shape

(4293822, 4)

In [26]:
#picking a single time series from the dataset for illustration
freq = train_df.iloc[0]['frequency']
ts_train = train_df.loc[train_df.LCLid=="MAC000193", ['LCLid',"timestamp","energy_consumption"]]
ts_val = val_df.loc[val_df.LCLid=="MAC000193", ['LCLid',"timestamp","energy_consumption"]]
ts_test = test_df.loc[test_df.LCLid=="MAC000193", ['LCLid',"timestamp","energy_consumption"]]

In [27]:
ts_train.shape

(35087, 3)