In [1]:
import datetime as dt
import numpy as np
import pandas as pd
from plotly import tools
import plotly.offline as py
py.init_notebook_mode (connected = True)
import plotly.graph_objs as go
## Append the sys path for xgboost
from keras.models import Sequential
from keras.layers import Dense, LSTM
import sys
sys.path.append (r"C:\Users\martinwg\xgboost\python-package")
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
df = pd.read_csv ("crypto-markets.csv", na_values = ['NA', '?'])

df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
df['hlc_average'] = (df['high'] + df['low'] + df['close']) / 3
df['ohlc_average'] = (df['open'] + df['high'] + df['low'] + df['close']) / 4
bitcoin = df[df['name'] == 'Bitcoin'].copy()
bitcoin['target'] = bitcoin['close'].shift(-1)

Using TensorFlow backend.


In [8]:
cutIdx = len(bitcoin[bitcoin['date']< dt.date(2018, 2, 27)])

bit_train = bitcoin[:cutIdx] # sets for ARIMA routine
bit_holdout = bitcoin[cutIdx:]
print("Training set has {} observations.".format(len(bit_train)))
print("Test set has {} observations.".format(len(bit_holdout)))

Training set has 1766 observations.
Test set has 100 observations.


In [17]:
## Packages and Models
from statsmodels.tsa.arima_model import ARIMA
from matplotlib import pyplot
import itertools
import warnings
import statsmodels.api as sm
from sklearn import metrics
from matplotlib import pyplot
from keras.callbacks import EarlyStopping
from sklearn.model_selection import TimeSeriesSplit
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler

In [10]:
def nforecast(nPred, low_train, low_test, modType, modArgs, showResults = False):

    from matplotlib import pyplot
    from sklearn import metrics
    from statsmodels.tsa.arima_model import ARIMA

    history = [x for x in low_train]
    predictions = list()
    rmse = list()

    for t in range(0, nPred):
        if modType == 'arima':
            model = ARIMA(history, order = modArgs)
        model_fit = model.fit(disp = 0)
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        obs = low_test.values[t]
        history.append(obs)
        if showResults:
            print('predicted=%f, expected=%f' % (yhat, obs))

    return(predictions)

In [11]:
def to_sequences(seq_size, obs):
    x = []
    y = []

    for i in range(len(obs) - SEQUENCE_SIZE - 1):
        #print(i)
        window = obs[i:(i + SEQUENCE_SIZE)]
        after_window = obs[i + SEQUENCE_SIZE]
        window = [[x] for x in window]
        #print("{} - {}".format(window,after_window))
        x.append(window)
        y.append(after_window)

    return np.array(x),np.array(y)

In [16]:
def create_lookback(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

In [12]:
train = bit_train

In [22]:
## Main Function
def ARIMA_LSTM(train, holdout):
    training_idx = int(train.shape[0]*0.9)

    train_df = train[:training_idx]
    test_df = train[training_idx:]

    s = pd.Series(train['close'])
    low_VC = s.ewm(alpha = 0.6).mean()
    high_VC = s - low_VC

    low_train = low_VC[:training_idx]
    low_test = low_VC[training_idx:]

    high_train = high_VC[:training_idx].tolist() # format (y - y_ses) as a list for LSTM routine
    high_test = high_VC[training_idx:].tolist()
    
        
    ### ARIMA part on low
    model = ARIMA(low_train, order=(5,1,0))
    d = range(0, 2)
    p = q = range(0, 6)

    # Generate all different combinations of p, q and q triplets
    pdq = list(itertools.product(p, d, q))
    print(pdq)
    
    
    warnings.filterwarnings("ignore") # specify to ignore warning messages
    critVals = list()
    modArgs = list()
    #low_train.head()
    for param in pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(low_train, order=param)
            results = mod.fit()
            critVals.append(results.aic)
            modArgs.append(param)
            print('ARIMA{} - AIC:{}'.format(param, results.aic))
        except:
            continue
    
    # State number of time stamps to forecast
    nPred = 1

    # Retrieve pdq settings for lowest aic
    params = modArgs[min(range(len(critVals)), key=critVals.__getitem__)]
    print(params)

    holdout_df1 = bit_holdout.iloc[0]['close']
    holdout_df1

    #train_close = pd.Series (train_close)
    model = ARIMA(low_VC, order = params)
    model_fit = model.fit(disp = 0)
    yforecast_arima = model_fit.forecast()[0]

    res_arima = DataFrame(model_fit.resid)
    yforecast_arima
    training_set = high_train
    
    training_set = np.reshape(training_set, (len(training_set), 1))
    #test_set = test_df['close'].values
    test_set = high_test
    test_set = np.reshape(test_set, (len(test_set), 1))

    #scale datasets
    scaler = MinMaxScaler()
    training_set = scaler.fit_transform(training_set)
    test_set = scaler.transform(test_set)

    # create datasets which are suitable for time series forecasting
    look_back = len(training_set)-1
    look_back1 = len(test_set)-1
    X_train, Y_train = create_lookback(training_set, 1)
    X_test, Y_test = create_lookback(test_set, 1)

     # reshape datasets so that they will be ok for the requirements of the LSTM model in Keras
    X_train = np.reshape(X_train, (len(X_train), 1, X_train.shape[1]))
    X_test = np.reshape(X_test, (len(X_test), 1, X_test.shape[1]))
    
    
    model = Sequential()
    model.add(LSTM(256, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(256))
    model.add(Dense(1))
    
    
    # compile and fit the model
    model.compile(loss='mean_squared_error', optimizer='adam')
    history = model.fit(X_train, Y_train, epochs=100, batch_size=16, shuffle=False,
                    validation_data=(X_test, Y_test),
                    callbacks = [EarlyStopping(monitor='val_loss', min_delta=5e-5, patience=10, verbose=1)])
    return yforecast_arima;

In [23]:
ARIMA_LSTM (bit_train, bit_holdout)

[(0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 0, 4), (0, 0, 5), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 0, 4), (1, 0, 5), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4), (1, 1, 5), (2, 0, 0), (2, 0, 1), (2, 0, 2), (2, 0, 3), (2, 0, 4), (2, 0, 5), (2, 1, 0), (2, 1, 1), (2, 1, 2), (2, 1, 3), (2, 1, 4), (2, 1, 5), (3, 0, 0), (3, 0, 1), (3, 0, 2), (3, 0, 3), (3, 0, 4), (3, 0, 5), (3, 1, 0), (3, 1, 1), (3, 1, 2), (3, 1, 3), (3, 1, 4), (3, 1, 5), (4, 0, 0), (4, 0, 1), (4, 0, 2), (4, 0, 3), (4, 0, 4), (4, 0, 5), (4, 1, 0), (4, 1, 1), (4, 1, 2), (4, 1, 3), (4, 1, 4), (4, 1, 5), (5, 0, 0), (5, 0, 1), (5, 0, 2), (5, 0, 3), (5, 0, 4), (5, 0, 5), (5, 1, 0), (5, 1, 1), (5, 1, 2), (5, 1, 3), (5, 1, 4), (5, 1, 5)]
ARIMA(0, 0, 0) - AIC:26338.134219722662
ARIMA(0, 1, 0) - AIC:15148.74656572134
ARIMA(0, 1, 1) - AIC:14888.898675911074
ARIMA(0, 1, 2) - AIC:14853.023297955222
ARIMA(0, 1, 3) - AIC:14831.33094154232
ARIMA(0, 

array([10244.69415185])