In [3]:
import pandas as pd
import sktime
import os
import sys

rootdir = "../"
sys.path.append(rootdir)

from data_opener import open_dataset_and_ground_truth

The problem with sktime is that it assumes that the model will be trained on the data right before the predicted sequence. A model cannot be trained, then applied to a completely different dataset. Only forward is possible.

This means that an evaluation setting to get a list of y_pred, would require fitting as many models with horizon 1 as there is test variables... Which is way too costly.

Perhaps sktime regressors would be more suited to my approach.

Just in case, here are some interesting sktime forecaster, that admit exogeneous variables.
 - https://www.sktime.net/en/latest/api_reference/auto_generated/sktime.forecasting.arch.StatsForecastGARCH.html
 - https://www.sktime.net/en/latest/api_reference/auto_generated/sktime.forecasting.trend.STLForecaster.html
 - https://www.statsmodels.org/dev/generated/statsmodels.tsa.statespace.dynamic_factor_mq.DynamicFactorMQ.html#statsmodels.tsa.statespace.dynamic_factor_mq.DynamicFactorMQ
 - https://www.sktime.net/en/latest/api_reference/auto_generated/sktime.forecasting.trend.PolynomialTrendForecaster.html#sktime.forecasting.trend.PolynomialTrendForecaster.fit_predict
 - https://www.sktime.net/en/latest/api_reference/auto_generated/sktime.forecasting.fbprophet.Prophet.html#prophet

As for regressors, they look like an appropriate solution



In [4]:
df, _, _, _ = open_dataset_and_ground_truth("fMRI_processed_by_Nauta/returns/our_selection","timeseries1.csv", rootdir = "../")

In [12]:
df

Unnamed: 0,0,1,2,3,4
0,-0.678524,-0.556295,-0.299887,-1.173443,-0.437786
1,-0.293738,-0.240375,-1.133090,-0.241252,0.457178
2,-0.507616,-0.338707,-0.325274,-0.411633,0.036790
3,1.088598,-1.029587,-0.413861,-0.043702,-0.834169
4,1.387013,0.022765,0.608978,-0.677545,-0.405465
...,...,...,...,...,...
195,0.575230,0.528570,0.454471,0.945913,1.363937
196,1.959348,0.842827,-0.848520,1.492368,1.683120
197,0.524308,0.211892,-0.178575,0.498877,0.708297
198,-1.232276,-0.653899,-0.882367,0.289989,-0.022106


In [13]:
from sktime.forecasting.fbprophet import Prophet

In [15]:
forecaster = Prophet(
    seasonality_mode='multiplicative',
    )
forecaster.fit(df["0"])  

16:24:57 - cmdstanpy - INFO - Chain [1] start processing
16:24:57 - cmdstanpy - INFO - Chain [1] done processing


In [16]:
forecaster.predict(fh=[1])

200   -0.032439
Name: 0, dtype: float64

In [22]:
forecaster.get_fitted_params()

{'k': -0.025913,
 'm': 0.0139355,
 'sigma_obs': 0.368582,
 'delta': array([ 4.48968e-09,  1.73753e-04,  3.99225e-10,  3.35399e-09,
        -3.22905e-09,  5.21945e-09,  9.03097e-09,  5.31525e-09,
         1.89466e-09, -4.57523e-09, -1.09110e-11,  3.60884e-09,
         4.56482e-09, -5.24390e-09,  7.96387e-09,  1.90885e-09,
         8.34667e-09,  4.39689e-10,  3.35093e-09,  3.56063e-09,
        -9.83922e-10,  2.66479e-09, -3.41292e-10, -5.96008e-09,
         3.12992e-09]),
 'beta': array([-4.51561e-03, -6.86330e-06, -2.60030e-03,  5.77618e-03,
         3.26588e-03,  7.12572e-04])}

In [31]:
a = forecaster.predict_residuals()

In [32]:
b = forecaster.predict_residuals(y=df["1"])

In [33]:
a - b + df["1"]-df["0"]

0      0.000000e+00
1     -5.551115e-17
2      0.000000e+00
3      2.220446e-16
4     -2.220446e-16
           ...     
195    0.000000e+00
196   -2.220446e-16
197   -1.110223e-16
198   -2.220446e-16
199    0.000000e+00
Length: 200, dtype: float64

## Regression

In [5]:
from sktime.regression.kernel_based import RocketRegressor

In [7]:
X_train, y_train = df[["0","1","2","3"]][:150], df["4"][:150]
X_test, y_test = df[["0","1","2","3"]][150:], df["4"][150:]
reg = RocketRegressor(num_kernels=500)
reg.fit(X_train.values, y_train) 

In [10]:
reg.predict(X_test.values) - y_test

150   -0.075437
151    0.186735
152   -1.186694
153   -1.840561
154    0.993437
155    1.153353
156    0.885528
157    0.341837
158   -1.267892
159   -0.319106
160   -0.461265
161   -3.132581
162   -0.766755
163    1.930694
164    0.381987
165   -2.165193
166   -0.631257
167    0.661356
168    0.164489
169    0.015012
170    0.288475
171    0.075576
172    0.913179
173    0.010503
174    0.908372
175    1.004148
176   -0.089637
177   -1.966492
178   -1.459564
179   -1.227624
180    0.306456
181    2.914213
182    1.288950
183   -1.033642
184    1.138677
185    1.302828
186   -0.697739
187   -0.838609
188   -0.078849
189   -0.258168
190    1.241632
191    0.382624
192   -0.702680
193   -1.338314
194    1.618735
195   -1.618249
196   -1.606648
197   -0.856484
198   -0.168296
199    1.420096
Name: 4, dtype: float64