In [None]:
# !pip install pmdarima

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima.arima import auto_arima # лучше всего для временных рядов
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import seaborn as sns

In [None]:
stock_data = pd.read_csv('data/stock.csv', sep=',', index_col='Date', parse_dates=['Date']).fillna(0)

In [None]:
stock_data.head()

In [None]:
sns.lineplot(stock_data.Close)

In [None]:
# Проверка стационарности
def test_stationarity(timeseries):
    rolmean = timeseries.rolling(10).mean()
    rolstd = timeseries.rolling(10).std()

    plt.plot(timeseries, color='blue',label='Original')
    plt.plot(rolmean, color='red', label='Rolling Mean')
    plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean and Standard Deviation')
    plt.show()
    
    print('Тест Дики-Фуллера')
    adft = adfuller(timeseries)
    
    output = pd.Series(adft[0:4],index=['Test Statistics','p-value','No. of lags used','Number of observations used'])
    for key,values in adft[4].items():
        output['critical value (%s)'%key] =  values
    print(output)
    
test_stationarity(stock_data.Close)

# Приводим к стационарности

In [None]:
result = seasonal_decompose(stock_data.Close, model='multiplicative', period = 30) # model may be 'additive'
fig = plt.figure()  
fig = result.plot()  
fig.set_size_inches(16, 9)

In [None]:
# Убираем тренд
df_log = np.log(stock_data.Close)
moving_avg = df_log.rolling(10).mean()
std_dev = df_log.rolling(10).std()

In [None]:
plt.legend(loc='best')
plt.title('Moving Average')
# plt.plot(stock_data.Close, color='blue', label='original')
plt.plot(df_log, color="green", label = "log")
plt.plot(std_dev, color ="black", label = "Standard Deviation")
plt.plot(moving_avg, color="red", label = "Mean")
plt.legend()
plt.show()

In [None]:
train_data, test_data = df_log[3:int(len(df_log)*0.9)], df_log[int(len(df_log)*0.9):]
plt.xlabel('Dates')
plt.ylabel('Closing Prices')
plt.plot(df_log, 'green', label='Train data')
plt.plot(test_data, 'blue', label='Test data')
plt.legend()

# ARIMA

In [None]:
model_autoARIMA = auto_arima(train_data, start_p=0, start_q=0,
                      test='adf',       # чтобы найти оптимальное d
                      max_p=3, max_q=3, # задаем максимальные p и q
                      m=1,              # период для поиска сезонности (не ищем)
                      d=None,           # определится с помощью adf
#                       seasonal=False,   
                      start_P=0, 
                      D=0, 
                      trace=True,
                      error_action='ignore',  
                      suppress_warnings=True, 
                      stepwise=True)
print(model_autoARIMA.summary())
plt.show()

In [None]:
model_autoARIMA.fit(train_data)

In [None]:
forecast = model_autoARIMA.predict(n_periods=len(test_data))

In [None]:
forecast.index = test_data.index

In [None]:
forecast

In [None]:
plt.figure(figsize=(10,5), dpi=100)
plt.plot(train_data, label='training data')
plt.plot(test_data, color = 'blue', label='Actual Stock Price')
plt.plot(forecast, color = 'orange',label='Predicted Stock Price')
plt.title('ARCH CAPITAL GROUP Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('ARCH CAPITAL GROUP Stock Price')
plt.legend(loc='upper left', fontsize=8)
plt.show()

In [None]:
mean_squared_error(test_data, forecast), mean_absolute_error(test_data, forecast)