#  Noteebook to fit and plot DeepAR for time series forecasting.

Written by Baris Yazici as part of the Practitioners Challenge 2024 at LSE.

# Libraries

In [None]:
# Run the line below if gluonts torch is not installed in your computer.
# pip install "gluonts[torch]"

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator

from scipy.interpolate import UnivariateSpline

# Helper Functions
1.   The function "fill_missing_with_spline" handles missing values in time series using splines.
2.   The function "prepare_dataframes_from_uploaded_files" extract file names from the loaded csv files.


In [None]:
# Function to fill missing values in time series using splines
def fill_missing_with_spline(series):
    # Extract indices of null values
    null_indices = series[series.isnull()].index
    # Extract non-null values
    non_null_values = series.dropna()
    # Create spline interpolation object with specified s value
    spline = UnivariateSpline(non_null_values.index.astype(int), non_null_values.values, s=2)
    # Interpolate null values
    interpolated_values = spline(null_indices.astype(int))
    # Replace null values with interpolated values
    series.loc[null_indices] = interpolated_values
    return series

In [None]:
import os
from google.colab import files

# Function to read uploaded CSV files and prepare dataframes list
def prepare_dataframes_from_uploaded_files(uploaded_files):
    dataframes = []
    for name, content in uploaded_files.items():
        name_without_extension = os.path.splitext(name)[0]  # Extract original file name without extension
        dataframes.append((name_without_extension, name))
    return dataframes

# Function to Fit and Plot DeepAR

In [None]:
def deepar_fit_and_plot(dataframes):
    # Load data from CSV files and apply spline interpolation
    dfs = []
    for name, filename in dataframes:
        df = pd.read_csv(filename, index_col=0, parse_dates=True)
        df.index = pd.to_datetime(df.index)
        df = df.asfreq('D')
        df = df.apply(fill_missing_with_spline, axis=0)
        df.reset_index(inplace=True)
        df['source'] = name
        dfs.append(df)

    # Combine the dataframes vertically
    df = pd.concat(dfs, ignore_index=True).sort_values('Date')

    dataset = PandasDataset.from_long_dataframe(
        dataframe=df,
        target='DCCrho',
        item_id='source',
        timestamp='Date',
        freq='D')

    training_data, test_gen = split(dataset, offset=-240)
    test_data = test_gen.generate_instances(prediction_length=10, windows=24)

    # Train the model
    model = DeepAREstimator(
        prediction_length=10,
        freq="D",
        dropout_rate=0.5,
        num_layers=4,
        lr=0.02,
        trainer_kwargs={"max_epochs": 10}
    ).train(training_data)

    forecasts = list(model.predict(test_data.input))

    # Plot predictions
    for name, _ in dataframes:
        plt.figure(figsize=(12, 6))
        df_temp = df[df['source'] == name]
        plt.plot(df_temp["Date"], df_temp["DCCrho"], color='#377eb8', label='True values (' + name + ')')
        for forecast in forecasts:
            if forecast.item_id == name:
                forecast.plot(color='#e41a1c')
        # we chose colors from https://gist.github.com/thriveth/8560036
        # a color blind/friendly color cycle for Matplotlib line plots.

        # Limiting x-axis
        plt.xlim(pd.Timestamp('2023-03-01'), pd.Timestamp('2023-12-31'))  # Specify the start and end dates for the x-axis

        plt.legend(fontsize="small")
        plt.title(name + ' Predictions')
        plt.show()

# Example Use

In [None]:
# Upload CSV files that have a column called "DCCrho" for correlations and corresponding dates.
uploaded = files.upload()

# Prepare the dataframes to input deepar
dataframes = prepare_dataframes_from_uploaded_files(uploaded)

# Run the function to fit the model and see the plots
deepar_fit_and_plot(dataframes)