In [None]:
!pip install "gluonts[torch]"

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator
from scipy.interpolate import UnivariateSpline

In [None]:
# helper function that fills missing values
def fill_missing_with_spline(series):
    # Extract indices of null values
    null_indices = series[series.isnull()].index
    # Extract non-null values
    non_null_values = series.dropna()
    # Create spline interpolation object with specified s value
    spline = UnivariateSpline(non_null_values.index.astype(int), non_null_values.values, s=2)
    # Interpolate null values
    interpolated_values = spline(null_indices.astype(int))
    # Replace null values with interpolated values
    series.loc[null_indices] = interpolated_values
    return series

# function to fit deepar and plot forecasts
def deepar_fit_and_plot(dataframes):
    # Load data from CSV files and apply spline interpolation
    dfs = []
    for name, filename in dataframes:
        df = pd.read_csv(filename, index_col=0, parse_dates=True)
        df.index = pd.to_datetime(df.index)
        df = df.asfreq('D')
        df = df.apply(fill_missing_with_spline, axis=0)
        df.reset_index(inplace=True)
        df['source'] = name
        dfs.append(df)

    # Combine the dataframes vertically
    df = pd.concat(dfs, ignore_index=True).sort_values('Date')

    dataset = PandasDataset.from_long_dataframe(
        dataframe=df,
        target='DCCrho',
        item_id='source',
        timestamp='Date',
        freq='D')

    training_data, test_gen = split(dataset, offset=-240)
    test_data = test_gen.generate_instances(prediction_length=10, windows=24)

    # Train the model
    model = DeepAREstimator(
        prediction_length=10,
        freq="D",
        dropout_rate=0.5,
        num_layers=4,
        lr=0.02,
        trainer_kwargs={"max_epochs": 10}
    ).train(training_data)

    forecasts = list(model.predict(test_data.input))

    # Plot predictions
    for name, _ in dataframes:
        plt.figure(figsize=(12, 6))
        df_temp = df[df['source'] == name]
        plt.plot(df_temp["Date"], df_temp["DCCrho"], color='blue', label='True values (' + name + ')')
        for forecast in forecasts:
            if forecast.item_id == name:
                forecast.plot(color='red')

        # Limiting x-axis
        plt.xlim(pd.Timestamp('2023-03-01'), pd.Timestamp('2023-12-31'))  # Specify the start and end dates for the x-axis

        plt.legend(fontsize="small")
        plt.title(name + ' Predictions')
        plt.show()

In [None]:
# Example usage:
# Input data frames with dates as index and dcc correlation estimates on column 1

dataframes = [('df1', 'DCCrho_bba_petr.csv'),
              ('df2', 'DCCrho_bba_ypf.csv')]

deepar_fit_and_plot(dataframes)