In [2]:
import pandas as pd
import numpy as np
import requests
import datetime
import time

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [3]:
bdata = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20150101&end="+time.strftime("%Y%m%d"))[0]

In [4]:
bdata = bdata.assign(Date=pd.to_datetime(bdata['Date']))
bdata['Volume'] = bdata['Volume'].astype('int64')

kwargs = {
            'Close Off High': lambda x: 2*(x['High'] - x['Close'])/(x['High'] - x['Low'])-1,
             'Volatility': lambda x: (x['High'] - x['Low'])/(x['Open'])
         }

bdata = bdata.assign(**kwargs)

# Get only relevant columns
mdata = bdata[['Date']+['Close']+['Volume']+['Close Off High']+['Volatility']]
mdata = mdata.sort_values(by='Date')

In [5]:
mdata.head()

Unnamed: 0,Date,Close,Volume,Close Off High,Volatility
1212,2015-01-01,314.25,8036550,0.92236,0.020097
1211,2015-01-02,315.03,7860650,-0.289474,0.007259
1210,2015-01-03,281.08,33054400,1.0,0.10821
1209,2015-01-04,264.19,55629100,0.555706,0.105353
1208,2015-01-05,274.47,43962800,-0.41629,0.050023


In [6]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as mse
from sklearn.linear_model import LinearRegression

In [7]:
def train_test_rmse(df, feature_cols, testsize):
    X = df[feature_cols]
    y = df['Close']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=80, test_size=testsize)
    
    linreg = LinearRegression()
    linreg.fit(X_train, y_train)

    y_pred = linreg.predict(X_test)
    
    plt.scatter(y_test, y_pred)
    plt.xlabel('Actual Prices')
    plt.ylabel('Predicted Prices')
    print('RMSE: %f'%np.sqrt(mse(y_test, y_pred)))

In [8]:
split_date = '2017-07-01'
training_set, test_set = mdata[mdata['Date']<split_date], mdata[mdata['Date']>=split_date]
training_set = training_set.drop('Date', 1)
test_set = test_set.drop('Date', 1)

window_len=10
norm_cols= ['Close', 'Volume']

LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_training_inputs.append(temp_set)
    
LSTM_training_outputs = (training_set['Close'][window_len:].values/training_set['Close'][:-window_len].values)-1

LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_test_inputs.append(temp_set)
    
LSTM_test_outputs = (test_set['Close'][window_len:].values/test_set['Close'][:-window_len].values)-1

LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

In [9]:
def build_model_safe(inputs, output_size, neurons, activ_func="linear",
                dropout=0.25, loss="mae", optimizer="adam"):
    model = Sequential()

    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer)
    return model

In [10]:
def build_model(inputs, output_size, neurons, activ_func="relu",
                dropout=0.25, loss="mae", optimizer="adam"):
    model = Sequential()

    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer)
    return model

In [None]:
np.random.seed(69)
btc_model = build_model(LSTM_training_inputs, output_size=1, neurons=20)
btc_history = btc_model.fit(LSTM_training_inputs, LSTM_training_outputs, epochs=80, batch_size=128, verbose=1, shuffle=True)

In [None]:
'MAE: %.4f'%np.mean(np.abs((np.transpose(btc_model.predict(LSTM_training_inputs))+1)-(training_set['Close'].values[window_len:])/(training_set['Close'].values[:-window_len])))