https://www.sktime.net/en/stable/api_reference/forecasting.html

In [14]:
pip install sktime[all_extras]



In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [16]:
from sktime.forecasting.base import ForecastingHorizon

In [17]:
ts = pd.read_csv('https://raw.githubusercontent.com/ranalytics/tsa-r/master/data/cryptos_price.csv')
ts=ts.sort_values(by='ds')
ts['ds'] = pd.to_datetime(ts['ds'])
ts

Unnamed: 0,y,ds,coin
7754,4.530000,2018-01-01,tezos
2114,2.390000,2018-01-01,xrp
2819,1.010000,2018-01-01,tether
3524,229.030000,2018-01-01,litecoin
4229,8.840000,2018-01-01,eos
...,...,...,...
4230,0.055028,2019-12-06,stellar
3525,2.740000,2019-12-06,eos
2820,45.670000,2019-12-06,litecoin
2115,1.000000,2019-12-06,tether


In [18]:
bc = ts[ts['coin'] == 'bitcoin']

In [19]:
bc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 705 entries, 704 to 0
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   y       705 non-null    float64       
 1   ds      705 non-null    datetime64[ns]
 2   coin    705 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 22.0+ KB


In [20]:
y = bc['y']
y.index = bc['ds']

In [21]:
y = y.squeeze().asfreq(freq='D')

In [22]:
y

Unnamed: 0_level_0,y
ds,Unnamed: 1_level_1
2018-01-01,13657.20
2018-01-02,14982.10
2018-01-03,15201.00
2018-01-04,15599.20
2018-01-05,17429.50
...,...
2019-12-02,7321.99
2019-12-03,7320.15
2019-12-04,7252.03
2019-12-05,7448.31


In [23]:
from sktime.datasets import load_airline
from sktime.forecasting.base import ForecastingHorizon
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from sktime.split import temporal_train_test_split
import dask.dataframe as dd

#y = load_airline()
y_train, y_test = temporal_train_test_split(y, test_size =100)


In [24]:
y_train.shape

(605,)

In [25]:
y_test.shape

(100,)

In [26]:
fh = ForecastingHorizon(y_test.index, is_relative=False)
fh

ForecastingHorizon(['2019-08-29', '2019-08-30', '2019-08-31', '2019-09-01',
               '2019-09-02', '2019-09-03', '2019-09-04', '2019-09-05',
               '2019-09-06', '2019-09-07', '2019-09-08', '2019-09-09',
               '2019-09-10', '2019-09-11', '2019-09-12', '2019-09-13',
               '2019-09-14', '2019-09-15', '2019-09-16', '2019-09-17',
               '2019-09-18', '2019-09-19', '2019-09-20', '2019-09-21',
               '2019-09-22', '2019-09-23', '2019-09-24', '2019-09-25',
               '2019-09-26', '2019-09-27', '2019-09-28', '2019-09-29',
               '2019-09-30', '2019-10-01', '2019-10-02', '2019-10-03',
               '2019-10-04', '2019-10-05', '2019-10-06', '2019-10-07',
               '2019-10-08', '2019-10-09', '2019-10-10', '2019-10-11',
               '2019-10-12', '2019-10-13', '2019-10-14', '2019-10-15',
               '2019-10-16', '2019-10-17', '2019-10-18', '2019-10-19',
               '2019-10-20', '2019-10-21', '2019-10-22', '2019-10-23',
 

In [27]:
from sktime.forecasting.naive import NaiveForecaster, NaiveVariance
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.arima import ARIMA
from sktime.forecasting.sarimax import SARIMAX
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.tbats import TBATS

In [28]:
forecaster_Naive = NaiveForecaster(strategy="drift")
variance_forecaster = NaiveVariance(forecaster_Naive)
variance_forecaster.fit(y_train)
y_pred_Naive = variance_forecaster.predict(fh)
mean_absolute_percentage_error(y_test, y_pred_Naive)

0.11281570880498094

In [29]:
forecaster_Exp = ExponentialSmoothing(
    trend='add', seasonal='additive'
)
forecaster_Exp.fit(y_train)
y_pred_Exp = forecaster_Exp.predict(fh)
mean_absolute_percentage_error(y_test, y_pred_Exp)




0.08585341708868707

In [30]:
forecaster_ARIMA = ARIMA(
    order=(1, 0, 0),
    seasonal_order=(1, 0, 0, 12),
    suppress_warnings=True)

forecaster_ARIMA.fit(y_train)
y_pred_ARIMA = forecaster_ARIMA.predict(fh)
mean_absolute_percentage_error(y_test, y_pred_ARIMA)

0.10880489206794264

In [31]:
forecaster_SARIMAX = SARIMAX(
    order=(1, 0, 0), trend="t", seasonal_order=(1, 0, 0, 6))

forecaster_SARIMAX.fit(y_train)
y_pred_SARIMAX = forecaster_SARIMAX.predict(fh)
mean_absolute_percentage_error(y_test, y_pred_SARIMAX)

0.2241704016728081

In [None]:
forecaster_autoARIMA = AutoARIMA(
    d=0, max_p=3, max_q=3, suppress_warnings=True)
forecaster_autoARIMA.fit(y_train)
y_pred_autoARIMA = forecaster_autoARIMA.predict(fh)
mean_absolute_percentage_error(y_test, y_pred_autoARIMA)

TBATS is acronym for:

Trigonometric seasonality

Box-Cox transformation

ARMA errors

Trend

Seasonal components

In [None]:
forecaster_TBATS = TBATS(
    use_box_cox=False,
    use_trend=True,
    use_damped_trend=False,
    sp=12,
    use_arma_errors=False,
    n_jobs=1)

forecaster_TBATS.fit(y_train)
y_pred_TBATS = forecaster_TBATS.predict(fh)
mean_absolute_percentage_error(y_test, y_pred_TBATS)

In [None]:
fig = px.line(bc,
              x="ds", y=bc.columns[0],
              # line_shape="spline",
              render_mode="svg", log_y=False,)

fig.add_scatter(x=y_pred_Naive.index,  y=y_pred_Naive, mode='lines', name='Naive')
fig.add_scatter(x=y_pred_Exp.index,  y=y_pred_Exp, mode='lines', name='ExpSmooth')
fig.add_scatter(x=y_pred_ARIMA.index,  y=y_pred_ARIMA, mode='lines', name='ARIMA')
fig.add_scatter(x=y_pred_SARIMAX.index,  y=y_pred_SARIMAX, mode='lines', name='SARIMAX')
fig.add_scatter(x=y_pred_autoARIMA.index,  y=y_pred_autoARIMA, mode='lines', name='autoARIMA')
fig.add_scatter(x=y_pred_TBATS.index,  y=y_pred_TBATS, mode='lines', name='TBATS')

fig.show()

Почему не получается выстроить идеальный прогноз?

In [None]:
pip install hurst

In [None]:
# Evaluate Hurst equation
from hurst import compute_Hc, random_walk
H, c, data = compute_Hc(y, kind='price', simplified=True)

In [None]:
# Plot
f, ax = plt.subplots()
ax.plot(data[0], c*data[0]**H, color="deepskyblue")
ax.scatter(data[0], data[1], color="purple")
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Time interval')
ax.set_ylabel('R/S ratio')
ax.grid(True)
plt.show()

print("H={:.4f}, c={:.4f}".format(H,c))

H = 0.5 — случайное блуждание,

0.5 < H < 1.0 — персистентный временной ряд,

0 < H < 0.5 — непресистентный временной ряд.


Временной ряд для приведенного примера является персистентным (устойчивым). Такой ряд характеризуется наличием долговременной памяти, которая обуславливает информационное влияние в течение больших периодов времени.