In [None]:

!pip install sqlalchemy

# Forecast Exploration Notebook
This notebook was used for the initial setup of the GluonTS DeepAR workflow. <br />It included loading the engineered time series, converting them into a timeseries (here I looked at both Pandas and List Datasets from GluonTS), model training and finally running backtests and evaluation.

## Config Setup

To keep track of experiments, their configuration and result is stored in a SQLlite database.

In [None]:
from sqlalchemy import create_engine, Column, Integer, Float, String, ForeignKey, select
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker, relationship

In [None]:
# Define table schema
Base = declarative_base()

class Forecast_Experiment(Base):
    __tablename__ = 'forecast_experiment'
    id = Column('id', Integer, primary_key=True)
    model = Column('model name', String(20))
    target = Column('target', String(100))
    past_rts_col = Column('past_rts_col', String(1000))
    epochs = Column('epochs', Integer)
    prediction_length = Column('prediction_length', Integer)
    num_layers = Column('num_layers', Integer)
    hidden_size = Column('hidden_size', Integer)
    MASE = Column('MASE', Float)
    MAPE = Column('MAPE', Float)
    RMSE = Column('RMSE', Float)
    wQL_10 = Column('wQL_10', Float)
    wQL_50 = Column('wQL_50', Float)
    wQL_90 = Column('wQL_90', Float)
    avg_wQL = Column('avg_wQL', Float)

In [None]:
# Create session
engine = create_engine("sqlite:///../config/forecast_config_v1.db")

Base.metadata.create_all(bind=engine)
Session = sessionmaker(bind=engine)
session = Session()

In [None]:
# Query experiments
q1 = select(Forecast_Experiment)
q1_result = session.execute(q1) 
for s in q1_result.scalars():
    print(f"{s.model}: /n avg_wQL = {s.avg_wQL}")

## DeepAR

In [None]:
# Install the GluonTS PyTorch implementation
!pip install "gluonts[torch]"

In [None]:
# Importing GluonTS methods 
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.common import ListDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator as TorchDeepAR
from gluonts.evaluation import Evaluator, backtest_metrics
from gluonts.evaluation import make_evaluation_predictions
from gluonts.model.evaluation import evaluate_model
from gluonts.evaluation.metrics import mape

# General Imports
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# Load Stock-Indices TTS from a CSV file into a DataFrame
file_name = "indices.csv"
file_path = f"../../data_engineering/financial_ts/{file_name}"
tts_df = pd.read_csv(file_path)

# Format DataFrame
tts_col_map = {i:f"ts_f_{i}" for i in tts_df.columns if i.isnumeric()}
tts_df = tts_df.rename(columns={'Unnamed: 0': 'Week'})
tts_df['Week'] = tts_df['Week'].apply(lambda x: datetime.fromisoformat(x))
tts_df = tts_df.set_index('Week')

# Rename Columns
tts_col_map = {i:f"ts_{i}" for i in tts_df.columns}
tts_df = tts_df.rename(columns=tts_col_map)
tts_df.head()

In [None]:
# Load financial RTS from a CSV file into a DataFrame
file_name = "ww_gdp.csv"
file_path = f"../../data_engineering/financial_ts/{file_name}"
rts_f_df = pd.read_csv(file_path)

# Set week as index
rts_f_df = rts_f_df.rename(columns={'Unnamed: 0': 'Week'})
rts_f_df['Week'] = rts_f_df['Week'].apply(lambda x: datetime.fromisoformat(x))
rts_f_df = rts_f_df.set_index('Week')

# Rename columns
rts_f_col_map = {i:f"ts_{i}" for i in rts_f_df.columns}
rts_f_df = rts_f_df.rename(columns=rts_f_col_map)
rts_f_df.head()

In [None]:
# Load event RTS from a CSV file into a DataFrame
file_name = "ts_dgelt_events.csv"
file_path = f"../../data_engineering/timeseries_data/{file_name}"
rts_e_df = pd.read_csv(file_path)

# Rename columns
rts_e_col_map = {i:f"ts_{i}" for i in rts_e_df.columns if i.isnumeric()}
rts_e_df = rts_e_df.rename(
    columns=dict({'Unnamed: 0': 'Week'}, **rts_e_col_map))
rts_e_df['Week'] = rts_e_df['Week'].apply(lambda x: datetime.fromisoformat(x))

# Set Week as index
rts_e_df = rts_e_df.set_index('Week')
rts_e_df.index = rts_e_df.index.tz_localize(None)
rts_e_df.head()

In [None]:
# Join TTS and RTS into a single DataFrame
ts_df = tts_df.join(rts_e_df).join(rts_f_df)
ts_df = ts_df.reset_index()
ts_df.head()

In the following cell the pandas dataframe is converted into a [Dataset](https://ts.gluon.ai/stable/api/gluonts/gluonts.dataset.pandas.html) which gives the data additional explicid structure.

In [None]:
# Define the names of your columns
time_col = 'Week' 
target_col = 'ts_MSCI_World'  
past_feat_cols = list(rts_e_col_map.values()) + list(rts_f_col_map.values())

#### To Pandas Dataset

In [None]:
# Target has to be a single value: https://ts.gluon.ai/stable/getting_started/models.html
pandas_dataset = PandasDataset(
    ts_df,
    target=target_col, #single value
    timestamp=time_col,
    freq='W',
    past_feat_dynamic_real=past_rts_col #list
)

#### To List Dataset

In [None]:
# Create an empty list to store the time series dictionaries
time_series = []

# Iterate over the rows of the DataFrame
for _, row in ts_df.iterrows():
    start_time = row[time_col]
    target_value = row[target_col]
    past_feat_values = row[past_feat_cols].tolist()
    
    if pd.isna(target_value):  # Skip rows with missing target
        continue
        
    # Check for missing values in past_feat_values
    if any(np.isnan(past_feat_values)):
        # Handle missing values in past_feat_values
        # For example, you can replace missing values with 0 or impute them using an appropriate method
        past_feat_values = np.nan_to_num(past_feat_values, nan=0.0)

    # Create a time series dictionary and append it to the list
    time_series.append({
        "start": start_time, 
        "target": [target_value], 
        "feat_dynamic_real": [past_feat_values]
    })


In [None]:
# Target has to be a single value: https://ts.gluon.ai/stable/getting_started/models.html
list_dataset = ListDataset(time_series, freq='W')

#### Training 

In [None]:
# Pick a Dataset
dataset = [
    list_dataset,
    pandas_dataset
][1] # 0 or 1

In [None]:
# Split the data for training and testing
training_data, test_gen = split(dataset, offset=-52)
test_data = test_gen.generate_instances(prediction_length=52, windows=1)

In [None]:
prediction_length=52
num_layers=2
hidden_size=40
max_epochs = 10

In [None]:
# Train the model and make predictions
predictor = TorchDeepAR(
    prediction_length=prediction_length, 
    freq="W", 
    num_layers=num_layers, # Number of RNN layers (default: 2).
    hidden_size=hidden_size, # Number of RNN cells for each layer (default: 40).
    trainer_kwargs={"max_epochs": max_epochs}
).train(training_data)

#### Evaluate Model

In [None]:
# Create backtest
forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_data.dataset,
        predictor=predictor,
        num_samples=100,
)
forecasts = list(forecast_it)
tss = list(ts_it)

In [None]:
# Plot predictions
plt.plot(tts_df[target_col]["2016":], color="black")
for forecast in forecasts_it:
    forecast.plot()
plt.legend(["True values"], loc="upper left", fontsize="xx-large")

In [None]:
# Compute metrics
evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])  
agg_metrics, item_metrics = evaluator(tss, forecasts)
agg_metrics

In [None]:
# Store experiment run in database
experiment_config = Forecast_Experiment(
    model = 'torch_deepar',
    target = target_col,
    past_rts_col = str(past_rts_col),
    epochs = max_epochs,
    prediction_length = prediction_length,
    num_layers = num_layers,
    hidden_size = hidden_size,
    MASE = agg_metrics['MASE'],
    MAPE = agg_metrics['MAPE'],
    RMSE = agg_metrics['RMSE'],
    wQL_10 = agg_metrics['wQuantileLoss[0.1]'],
    wQL_50 = agg_metrics['wQuantileLoss[0.5]'],
    wQL_90 = agg_metrics['wQuantileLoss[0.9]'],
    avg_wQL = agg_metrics['mean_wQuantileLoss']
)
session.add(experiment_config)
session.commit()