In [1]:
import numpy as np
import pandas as pd

from pmdarima import auto_arima

import utils.AlphaVantageUtils as av
import utils.PostgresUtils as pg
import utils.ModelUtils as mdl

Connecting to the PostgreSQL database...
('PostgreSQL 11.5 on x86_64-pc-linux-gnu, compiled by gcc (Debian 7.3.0-5) 7.3.0, 64-bit',)


In [13]:
df_prices = pg.get_prices(av._TIC_MICROSOFT, av._INT_DAILY, None, None, None)

df_prices.index = df_prices[pg._COL_DATETIME]

df_prices.drop(columns=[pg._COL_DATETIME, pg._COL_ID, pg._COL_TICKER, pg._COL_INTERVAL, pg._COL_VOLUME], inplace=True)

df_prices.sort_values(by=pg._COL_DATETIME, inplace=True)

df_prices.head()

Unnamed: 0_level_0,open,high,low,close
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-20,107.06,109.69,105.87,106.0
2000-01-21,107.0,107.25,103.25,103.75
2000-01-24,103.8,105.69,100.81,101.25
2000-01-25,101.0,103.87,99.56,102.81
2000-01-26,102.44,103.5,99.12,99.37


In [17]:
df_train, df_test = mdl.train_test_split(df_prices, 0.1)

train = df_train[pg._COL_CLOSE]
test = df_test[pg._COL_CLOSE]

In [18]:
df_prices.shape, df_train.shape, df_test.shape

((5033, 4), (4530, 4), (503, 4))

In [23]:
min(train.index), max(train.index), min(test.index), max(test.index)

(Timestamp('2000-01-20 00:00:00'),
 Timestamp('2018-01-22 00:00:00'),
 Timestamp('2018-01-23 00:00:00'),
 Timestamp('2020-01-22 00:00:00'))

In [None]:
model = auto_arima(train, start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=1, D=1, 
                   trace=True, error_action='ignore', suppress_warnings=True)
model.fit(train)

forecast = model.predict(n_periods=len(test))

forecast = pd.DataFrame(forecast, index=test.index, columns=['prediction'])

Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=12901.291, BIC=12933.369, Fit time=98.744 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=15790.643, BIC=15803.474, Fit time=0.929 seconds
Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=14565.318, BIC=14590.981, Fit time=17.826 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=12895.698, BIC=12921.360, Fit time=79.861 seconds
Near non-invertible roots for order (0, 1, 1)(0, 1, 1, 12); setting score to inf (at least one inverse root too close to the border of the unit circle: 1.000)
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=15788.645, BIC=15795.061, Fit time=1.048 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=15593.828, BIC=15619.491, Fit time=38.242 seconds
Near non-invertible roots for order (1, 1, 1)(0, 1, 0, 12); setting score to inf (at least one inverse root too close to the border of the unit circle: 1.000)
Fit ARIMA: order=(

In [None]:
rms = np.sqrt(np.mean(np.power((np.array(test)-np.array(forecast['prediction'])),2)))
rms

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(16,8))

# Plot
plt.plot(train)
plt.plot(test)
plt.plot(forecast['prediction'])