In [1]:
!pip install pmdarima



In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
import plotly.express as px
import pmdarima as pm
import yfinance as yf
from sklearn import metrics

In [3]:
# Get data from yahoo finance
df = yf.download('GLD', start='2023-01-01')
df.tail()

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-08-21,175.899994,175.940002,174.960007,175.809998,175.809998,4206400
2023-08-22,175.830002,176.25,175.309998,176.100006,176.100006,5892500
2023-08-23,177.320007,178.220001,177.070007,177.889999,177.889999,7949100
2023-08-24,177.660004,178.490005,177.410004,177.850006,177.850006,5526400
2023-08-25,177.580002,178.380005,176.630005,177.619995,177.619995,6132800


In [4]:
# Plot
px.line(df, x=df.index, y=df['Close'])

In [5]:
# Split data into train and test set
n_test = 10
train = df.iloc[: -n_test]
test = df.iloc[-n_test: ]

In [6]:
# Create model
model = pm.auto_arima(train['Close'], trace=True, suppress_warnings=True, seasonal=False, error_action='ignore', stepwise=False)

 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=572.550, Time=0.02 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=574.240, Time=0.05 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=572.055, Time=0.06 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=573.355, Time=0.08 sec
 ARIMA(0,1,4)(0,0,0)[0] intercept   : AIC=571.263, Time=0.13 sec
 ARIMA(0,1,5)(0,0,0)[0] intercept   : AIC=566.721, Time=0.38 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=574.356, Time=0.09 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=575.162, Time=0.22 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=573.831, Time=0.36 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=569.181, Time=0.40 sec
 ARIMA(1,1,4)(0,0,0)[0] intercept   : AIC=569.232, Time=0.41 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=570.891, Time=0.14 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=572.860, Time=0.27 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=569.095, Time=0.30 sec
 ARIMA(2,1,3)(0,0,0)[0] intercept   : AIC=566.830, Time=0.45 sec
 ARIMA(3,1,0)(0,0,0)[0] i

In [7]:
# Summary
model.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,153.0
Model:,"SARIMAX(0, 1, 5)",Log Likelihood,-276.361
Date:,"Mon, 28 Aug 2023",AIC,566.721
Time:,13:21:00,BIC,587.889
Sample:,0,HQIC,575.32
,- 153,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept,0.0457,0.153,0.299,0.765,-0.254,0.345
ma.L1,-0.0825,0.080,-1.025,0.305,-0.240,0.075
ma.L2,-0.1649,0.082,-2.006,0.045,-0.326,-0.004
ma.L3,0.0393,0.081,0.488,0.625,-0.119,0.197
ma.L4,0.1442,0.068,2.112,0.035,0.010,0.278
ma.L5,0.2172,0.087,2.502,0.012,0.047,0.387
sigma2,2.2155,0.237,9.367,0.000,1.752,2.679

0,1,2,3
Ljung-Box (L1) (Q):,0.0,Jarque-Bera (JB):,5.31
Prob(Q):,0.95,Prob(JB):,0.07
Heteroskedasticity (H):,0.38,Skew:,0.27
Prob(H) (two-sided):,0.0,Kurtosis:,3.74


In [8]:
# Prediction
test_pred, conf = model.predict(n_periods=n_test, return_conf_int=True)


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.



In [9]:
# Plot
# Predict test data
# The fill property is an enumeration that may be specified as one of the following enumeration values:
# ['none', 'tozeroy', 'tozerox', 'tonexty', 'tonextx', 'toself', 'tonext'].
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=test.index, y=test['Close'], name='Test Data'))
fig.add_trace(go.Scatter(x=test.index, y=test_pred, name='Forecast'))
fig.add_trace(go.Scatter(x=test.index, y=conf[:,1], name='Upper'))
fig.add_trace(go.Scatter(x=test.index, y=conf[:,0], fill='tonexty', name='Lower'))
fig.show()

In [10]:
# Model parameters
params = model.get_params()
params

{'maxiter': 50,
 'method': 'lbfgs',
 'order': (0, 1, 5),
 'out_of_sample_size': 0,
 'scoring': 'mse',
 'scoring_args': {},
 'seasonal_order': (0, 0, 0, 0),
 'start_params': None,
 'trend': None,
 'with_intercept': True}

In [11]:
# Predict whole data
d = params['order'][1]
train_pred = model.predict_in_sample(start=d, end=-1)
# Plot
# The fill property is an enumeration that may be specified as one of the following enumeration values:
# ['none', 'tozeroy', 'tozerox', 'tonexty', 'tonextx', 'toself', 'tonext'].
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], name='Close'))
fig.add_trace(go.Scatter(x=train.index, y=train_pred, name='Train Forecast'))
fig.add_trace(go.Scatter(x=test.index, y=test_pred, name='Test Forecast'))
fig.add_trace(go.Scatter(x=test.index, y=conf[:,1], name='Upper'))
fig.add_trace(go.Scatter(x=test.index, y=conf[:,0], fill='tonexty', name='Lower'))
fig.show()

In [12]:
# Accuracy score
print('MSE: ', metrics.mean_squared_error(test['Close'], test_pred))
print('MAPE: ', metrics.mean_absolute_percentage_error(test['Close'], test_pred))
print('R^2: ', metrics.r2_score(test['Close'], test_pred))

MSE:  1.3616689122251924
MAPE:  0.005872858819765505
R^2:  -0.4126466017612347
