In this notebook, I tune the hyperparameters of the ARIMA model to predict the time series values. The metric of evaluation is the root mean squared error of the predicted price of bitcoin.

In [1]:
import pandas as pd
import numpy as np
from datetime import date
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings("ignore")
bitcoin =pd.read_csv('bitcoin.csv').drop('Unnamed: 0',axis=1)
bitcoin['Date'] = pd.to_datetime(bitcoin['Date'])
bitcoin.set_index('Date',inplace=True)
bits_log_shift = (np.log(bitcoin['Price']) - np.log(bitcoin['Price']).shift()).dropna()
ethereum = pd.read_csv('ethereum.csv').drop('Unnamed: 0',axis=1)
ethereum['Date'] = pd.to_datetime(ethereum['Date'])
ethereum.set_index('Date',inplace=True)
eth_log_shift = (np.log(ethereum['Price']) - np.log(ethereum['Price']).shift()).dropna()

  from pandas.core import datetools


In [3]:
def evaluate_arima_model(original_series,stationary_series,parameters, days_out=7):
    # Here we predict the noise
    train = stationary_series[:-days_out]
    # test the model on the last N points of the data
    test = stationary_series[-days_out:]
    #
    history = [x for x in train]

    train.dropna(inplace=True)
    test.dropna(inplace=True)
    predicted_values = []
    tested = []
    for i in range(len(test)):
        model = ARIMA(history, order=parameters)
        model_fit = model.fit(disp=0)
        yhat = float(model_fit.forecast(steps=1)[0])
        predicted_values.append(yhat)
        tested_values = list(test)[i]
        tested.append(tested_values)
        history.append(tested_values)
    predictions_series = pd.Series(predicted_values, index = test.index)
    
    # This part couples the signal to the noise.
    a = original_series.loc[original_series.index[-(days_out+1):]]['Price']
    b = np.exp(predictions_series)
    full_predictions = pd.DataFrame(a*b,columns=['Predicted with ARIMA']).dropna()
    df = pd.concat([original_series.loc[original_series.index[-days_out:]],full_predictions],axis=1)
    error = mean_squared_error(df['Price'],df['Predicted with ARIMA'])
    #print("ARIMA Root Mean Squared Error: ",error)
    #return df,error
    return error

In [4]:
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(original_series,stationary_series, p_values, d_values, q_values):
    original_series = original_series.astype('float32')
    stationary_series = stationary_series.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    rmse = np.sqrt(evaluate_arima_model(original_series,stationary_series, order))
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                    print('ARIMA%s RMSE=%.3f' % (order,rmse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
 
# evaluate parameters
p_values = [0, 1, 2, 4, 6, 8, 10]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")

In [5]:
evaluate_models(bitcoin,bits_log_shift, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=19.434
ARIMA(0, 0, 1) RMSE=22.524
ARIMA(0, 0, 2) RMSE=18.781
ARIMA(0, 1, 0) RMSE=112.209
ARIMA(0, 1, 1) RMSE=3.942
ARIMA(0, 1, 2) RMSE=11.408
ARIMA(0, 2, 0) RMSE=200.885
ARIMA(0, 2, 1) RMSE=114.265
ARIMA(1, 0, 0) RMSE=21.311
ARIMA(1, 0, 1) RMSE=24.669
ARIMA(1, 0, 2) RMSE=18.766
ARIMA(1, 1, 0) RMSE=89.173
ARIMA(1, 2, 0) RMSE=167.451
ARIMA(2, 0, 0) RMSE=18.628
ARIMA(2, 0, 1) RMSE=18.741
ARIMA(2, 0, 2) RMSE=19.912
ARIMA(2, 1, 0) RMSE=72.985
ARIMA(2, 1, 1) RMSE=15.637
ARIMA(2, 1, 2) RMSE=10.980
ARIMA(2, 2, 0) RMSE=157.780
ARIMA(2, 2, 2) RMSE=42.486
ARIMA(4, 0, 0) RMSE=21.567
ARIMA(4, 0, 1) RMSE=25.168
ARIMA(4, 0, 2) RMSE=27.245
ARIMA(4, 1, 0) RMSE=55.667
ARIMA(4, 1, 1) RMSE=31.193
ARIMA(4, 1, 2) RMSE=38.951
ARIMA(4, 2, 0) RMSE=105.909
ARIMA(6, 0, 0) RMSE=26.481
ARIMA(6, 0, 1) RMSE=27.145
ARIMA(6, 0, 2) RMSE=30.012
ARIMA(6, 1, 0) RMSE=55.074
ARIMA(6, 1, 1) RMSE=49.769
ARIMA(6, 1, 2) RMSE=24.589
ARIMA(6, 2, 0) RMSE=109.112
ARIMA(8, 0, 0) RMSE=26.960
ARIMA(8, 0, 1) RMSE=26

In [8]:
evaluate_models(ethereum,eth_log_shift, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=2.209
ARIMA(0, 0, 1) RMSE=1.837
ARIMA(0, 0, 2) RMSE=1.841
ARIMA(0, 1, 0) RMSE=12.922
ARIMA(0, 1, 1) RMSE=4.876
ARIMA(0, 2, 0) RMSE=25.888
ARIMA(0, 2, 1) RMSE=11.968
ARIMA(1, 0, 0) RMSE=1.851
ARIMA(1, 0, 1) RMSE=1.837
ARIMA(1, 1, 0) RMSE=8.110
ARIMA(1, 1, 1) RMSE=5.046
ARIMA(1, 1, 2) RMSE=5.031
ARIMA(1, 2, 0) RMSE=14.534
ARIMA(1, 2, 2) RMSE=3.499
ARIMA(2, 0, 0) RMSE=1.809
ARIMA(2, 0, 1) RMSE=2.151
ARIMA(2, 1, 0) RMSE=8.317
ARIMA(2, 1, 1) RMSE=4.917
ARIMA(2, 1, 2) RMSE=4.435
ARIMA(2, 2, 0) RMSE=12.330
ARIMA(2, 2, 2) RMSE=4.207
ARIMA(4, 0, 0) RMSE=2.569
ARIMA(4, 0, 1) RMSE=2.483
ARIMA(4, 0, 2) RMSE=2.961
ARIMA(4, 1, 0) RMSE=7.495
ARIMA(4, 1, 1) RMSE=4.505
ARIMA(4, 1, 2) RMSE=4.623
ARIMA(4, 2, 0) RMSE=9.976
ARIMA(4, 2, 1) RMSE=6.532
ARIMA(4, 2, 2) RMSE=4.023
ARIMA(6, 0, 0) RMSE=2.445
ARIMA(6, 0, 1) RMSE=2.570
ARIMA(6, 0, 2) RMSE=2.780
ARIMA(6, 1, 0) RMSE=6.773
ARIMA(6, 1, 1) RMSE=3.849
ARIMA(6, 1, 2) RMSE=3.848
ARIMA(6, 2, 0) RMSE=9.455
ARIMA(6, 2, 1) RMSE=5.287
ARIMA(8

This code was a engineered and modified from Jason Brownlee's blog.

https://machinelearningmastery.com/grid-search-arima-hyperparameters-with-python/

In [2]:
def ARIMA_predictions(original_series,stationary_series,parameters, days_out):
    # Here we predict the noise
    train = stationary_series[:-days_out]
    # test the model on the last N points of the data
    test = stationary_series[-days_out:]
    #
    history = [x for x in train]

    train.dropna(inplace=True)
    test.dropna(inplace=True)
    predicted_values = []
    tested = []
    for i in range(len(test)):
        model = ARIMA(history, order=parameters)
        model_fit = model.fit(disp=0)
        yhat = float(model_fit.forecast(steps=1)[0])
        predicted_values.append(yhat)
        tested_values = list(test)[i]
        tested.append(tested_values)
        history.append(tested_values)
    predictions_series = pd.Series(predicted_values, index = test.index)
    
    # This part couples the signal to the noise.
    a = original_series.loc[original_series.index[-(days_out+1):]]['Price']
    b = np.exp(predictions_series)
    full_predictions = pd.DataFrame(a*b,columns=['Predicted with ARIMA']).dropna()
    df = pd.concat([original_series.loc[original_series.index[-days_out:]],full_predictions],axis=1)
    error = str(np.sqrt(mean_squared_error(df['Price'],df['Predicted with ARIMA'])))
    print("ARIMA Root Mean Squared Error: ",error)
    return df,error

# BITCOIN PREDICTIONS WITH ARIMA

In [3]:
bitcoin_ARIMA,bit_error = ARIMA_predictions(bitcoin,bits_log_shift,(0,1,1),7)
bitcoin_ARIMA.to_csv('bitcoin_ARIMA_predictions_RMSE_'+bit_error+'.csv')
bitcoin_ARIMA

ARIMA Root Mean Squared Error:  3.898537928223503


Unnamed: 0_level_0,Price,Predicted with ARIMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-23,4318.350098,4321.91923
2017-08-24,4364.410156,4368.245276
2017-08-25,4352.299805,4356.228855
2017-08-26,4345.75,4349.690398
2017-08-27,4390.310059,4394.20315
2017-08-28,4597.310059,4601.035638
2017-08-29,4583.02002,4587.372385


# ETHEREUM PREDICTIONS WITH ARIMA

In [4]:
ethereum_ARIMA, eth_error = ARIMA_predictions(ethereum,eth_log_shift,(2,0,0),7)
ethereum_ARIMA.to_csv('ethereum_ARIMA_predictions_RMSE_'+eth_error+'.csv')
ethereum_ARIMA

ARIMA Root Mean Squared Error:  1.809057919412035


Unnamed: 0_level_0,Price,Predicted with ARIMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-08-23,325.28,327.386856
2017-08-24,330.06,331.704926
2017-08-25,332.86,334.689312
2017-08-26,347.88,350.004927
2017-08-27,347.66,348.96061
2017-08-28,372.35,374.689536
2017-08-29,383.86,384.718779
