In [1]:
#importing packages 
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np

In [2]:
# get market info for bitcoin from the start of 2016 to the current day
bitcoin_market_info = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"))[0]
# convert the date string to the correct date format
bitcoin_market_info = bitcoin_market_info.assign(Date=pd.to_datetime(bitcoin_market_info['Date']))
# when Volume is equal to '-' convert it to 0
bitcoin_market_info.loc[bitcoin_market_info['Volume']=="-",'Volume']=0
# convert to int
bitcoin_market_info['Volume'] = bitcoin_market_info['Volume'].astype('int64')
# look at the first few rows
bitcoin_market_info.head()

Unnamed: 0,Date,Open*,High,Low,Close**,Volume,Market Cap
0,2018-08-30,7043.76,7072.69,6834.69,6978.23,4463250000,121426092460
1,2018-08-29,7091.71,7113.3,6970.82,7047.16,4145880000,122240446825
2,2018-08-28,6891.08,7109.56,6882.34,7096.28,4659940000,118768797345
3,2018-08-27,6710.8,6884.64,6689.71,6884.64,4019000000,115646248487
4,2018-08-26,6754.64,6774.75,6620.75,6707.26,3295500000,116387236698


In [3]:
# get market info for ethereum from the start of 2016 to the current day
eth_market_info = pd.read_html("https://coinmarketcap.com/currencies/ethereum/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"))[0]
# convert the date string to the correct date format
eth_market_info = eth_market_info.assign(Date=pd.to_datetime(eth_market_info['Date']))
# look at the first few rows
eth_market_info.head()

Unnamed: 0,Date,Open*,High,Low,Close**,Volume,Market Cap
0,2018-08-30,289.75,291.24,275.21,284.11,1513100000,29449252259
1,2018-08-29,296.16,297.07,287.29,289.31,1474460000,30094775053
2,2018-08-28,286.65,297.4,283.57,296.5,1513350000,29122157621
3,2018-08-27,275.35,285.6,273.65,285.6,1406790000,27968567433
4,2018-08-26,279.52,279.52,272.44,275.2,1206650000,28386921151


In [4]:
bitcoin_market_info.columns =[bitcoin_market_info.columns[0]]+['bt_'+i for i in bitcoin_market_info.columns[1:]]
eth_market_info.columns =[eth_market_info.columns[0]]+['eth_'+i for i in eth_market_info.columns[1:]]

In [5]:
market_info = pd.merge(bitcoin_market_info,eth_market_info, on=['Date'])
market_info = market_info[market_info['Date']>='2017-01-01']
market_info.head(10)

Unnamed: 0,Date,bt_Open*,bt_High,bt_Low,bt_Close**,bt_Volume,bt_Market Cap,eth_Open*,eth_High,eth_Low,eth_Close**,eth_Volume,eth_Market Cap
0,2018-08-30,7043.76,7072.69,6834.69,6978.23,4463250000,121426092460,289.75,291.24,275.21,284.11,1513100000,29449252259
1,2018-08-29,7091.71,7113.3,6970.82,7047.16,4145880000,122240446825,296.16,297.07,287.29,289.31,1474460000,30094775053
2,2018-08-28,6891.08,7109.56,6882.34,7096.28,4659940000,118768797345,286.65,297.4,283.57,296.5,1513350000,29122157621
3,2018-08-27,6710.8,6884.64,6689.71,6884.64,4019000000,115646248487,275.35,285.6,273.65,285.6,1406790000,27968567433
4,2018-08-26,6754.64,6774.75,6620.75,6707.26,3295500000,116387236698,279.52,279.52,272.44,275.2,1206650000,28386921151
5,2018-08-25,6719.95,6789.63,6700.96,6763.19,3312600000,115778181501,283.28,283.51,278.47,279.65,1208360000,28762556561
6,2018-08-24,6551.52,6719.96,6498.64,6719.96,4097820000,112864138781,278.11,283.3,273.86,282.97,1450170000,28231866915
7,2018-08-23,6371.34,6546.54,6371.34,6534.88,3426180000,109748424726,271.75,279.55,271.09,277.1,1271160000,27580425754
8,2018-08-22,6486.25,6816.79,6310.11,6376.71,4668110000,111714827567,281.97,297.49,265.0,271.34,1507660000,28611999411
9,2018-08-21,6301.07,6500.87,6298.24,6488.76,3377180000,108513649180,273.33,285.97,273.33,281.94,1164120000,27729942831


In [7]:
for coins in ['bt_', 'eth_']: 
    kwargs = { coins+'close_off_high': lambda x: 2*(x[coins+'High']- x[coins+'Close**'])/(x[coins+'High']-x[coins+'Low'])-1,
            coins+'volatility': lambda x: (x[coins+'High']- x[coins+'Low'])/(x[coins+'Open*'])}
    market_info = market_info.assign(**kwargs)
    
model_data = market_info[['Date']+[coin+metric for coin in ['bt_', 'eth_'] 
                                   for metric in ['Close**','Volume','close_off_high','volatility']]]
# need to reverse the data frame so that subsequent rows represent later timepoints
model_data = model_data.sort_values(by='Date')
model_data.head()

Unnamed: 0,Date,bt_Close**,bt_Volume,bt_close_off_high,bt_volatility,eth_Close**,eth_Volume,eth_close_off_high,eth_volatility
606,2017-01-01,998.33,147775008,-0.78594,0.046054,8.17,14731700,0.22449,0.061404
605,2017-01-02,1021.75,222184992,-0.44422,0.034738,8.38,14579600,-0.692308,0.047736
604,2017-01-03,1043.84,185168000,-0.978648,0.022005,9.73,33625200,-0.678571,0.200717
603,2017-01-04,1154.73,344945984,-0.918449,0.11013,11.25,41051200,-0.965116,0.177137
602,2017-01-05,1013.38,510199008,0.266353,0.24265,10.25,41557400,0.317269,0.220549


In [8]:
split_date  = '2018-03-01'
training_set, test_set = model_data[model_data['Date']<split_date], model_data[model_data['Date']>=split_date]
training_set = training_set.drop('Date', 1)
test_set = test_set.drop('Date', 1)

In [9]:
window_len = 10
norm_cols = [coin+metric for coin in ['bt_', 'eth_'] for metric in ['Close**','Volume']]

LSTM_training_inputs = []
for i in range(len(training_set)-window_len):
    temp_set = training_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_training_inputs.append(temp_set)
LSTM_training_outputs = (training_set['eth_Close**'][window_len:].values/training_set['eth_Close**'][:-window_len].values)-1

LSTM_test_inputs = []
for i in range(len(test_set)-window_len):
    temp_set = test_set[i:(i+window_len)].copy()
    for col in norm_cols:
        temp_set.loc[:, col] = temp_set[col]/temp_set[col].iloc[0] - 1
    LSTM_test_inputs.append(temp_set)
LSTM_test_outputs = (test_set['eth_Close**'][window_len:].values/test_set['eth_Close**'][:-window_len].values)-1

LSTM_training_inputs[0]

Unnamed: 0,bt_Close**,bt_Volume,bt_close_off_high,bt_volatility,eth_Close**,eth_Volume,eth_close_off_high,eth_volatility
606,0.0,0.0,-0.78594,0.046054,0.0,0.0,0.22449,0.061404
605,0.023459,0.503536,-0.44422,0.034738,0.025704,-0.010325,-0.692308,0.047736
604,0.045586,0.25304,-0.978648,0.022005,0.190942,1.282506,-0.678571,0.200717
603,0.156662,1.334265,-0.918449,0.11013,0.376989,1.786589,-0.965116,0.177137
602,0.015075,2.452539,0.266353,0.24265,0.25459,1.820951,0.317269,0.220549
601,-0.096291,1.38116,0.775772,0.160583,0.25459,1.00057,-0.24,0.097182
600,-0.08989,0.891727,-1.0,0.094113,0.208078,0.571686,0.188406,0.067383
599,-0.087276,0.074031,0.13647,0.061079,0.259486,0.132021,-0.642857,0.056738
598,-0.09566,-0.039912,-0.358914,0.037099,0.264382,0.74578,0.40625,0.062076
597,-0.090802,-0.216322,0.041274,0.015303,0.29131,-0.261477,-0.3,0.038573


In [10]:
LSTM_training_inputs = [np.array(LSTM_training_input) for LSTM_training_input in LSTM_training_inputs]
LSTM_training_inputs = np.array(LSTM_training_inputs)

LSTM_test_inputs = [np.array(LSTM_test_inputs) for LSTM_test_inputs in LSTM_test_inputs]
LSTM_test_inputs = np.array(LSTM_test_inputs)

from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout

def build_model(inputs, output_size, neurons, activ_func="linear",
                dropout=0.25, loss="mae", optimizer="adam"):
    model = Sequential()

    model.add(LSTM(neurons, input_shape=(inputs.shape[1], inputs.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer)
    return model

  from ._conv import register_converters as _register_converters
Using Theano backend.


In [11]:
np.random.seed(202)
eth_model = build_model(LSTM_training_inputs, output_size=1, neurons = 20)
LSTM_training_outputs = (training_set['eth_Close**'][window_len:].values/training_set['eth_Close**'][:-window_len].values)-1
eth_history = eth_model.fit(LSTM_training_inputs, LSTM_training_outputs, 
                            epochs=50, batch_size=1, verbose=2, shuffle=True)

Epoch 1/50
 - 1s - loss: 0.1566
Epoch 2/50
 - 1s - loss: 0.1068
Epoch 3/50
 - 1s - loss: 0.0992
Epoch 4/50
 - 1s - loss: 0.0884
Epoch 5/50
 - 1s - loss: 0.0846
Epoch 6/50
 - 1s - loss: 0.0835
Epoch 7/50
 - 1s - loss: 0.0758
Epoch 8/50
 - 1s - loss: 0.0811
Epoch 9/50
 - 1s - loss: 0.0770
Epoch 10/50
 - 1s - loss: 0.0761
Epoch 11/50
 - 1s - loss: 0.0747
Epoch 12/50
 - 1s - loss: 0.0747
Epoch 13/50
 - 1s - loss: 0.0703
Epoch 14/50
 - 1s - loss: 0.0762
Epoch 15/50
 - 1s - loss: 0.0675
Epoch 16/50
 - 1s - loss: 0.0687
Epoch 17/50
 - 1s - loss: 0.0693
Epoch 18/50
 - 1s - loss: 0.0687
Epoch 19/50
 - 1s - loss: 0.0696
Epoch 20/50
 - 1s - loss: 0.0663
Epoch 21/50
 - 1s - loss: 0.0707
Epoch 22/50
 - 1s - loss: 0.0703
Epoch 23/50
 - 1s - loss: 0.0676
Epoch 24/50
 - 1s - loss: 0.0690
Epoch 25/50
 - 1s - loss: 0.0689
Epoch 26/50
 - 1s - loss: 0.0676
Epoch 27/50
 - 1s - loss: 0.0666
Epoch 28/50
 - 1s - loss: 0.0692
Epoch 29/50
 - 1s - loss: 0.0674
Epoch 30/50
 - 1s - loss: 0.0640
Epoch 31/50
 - 1s -

In [12]:
# random seed for reproducibility
np.random.seed(202)
# we'll try to predict the closing price for the next 5 days 
# change this value if you want to make longer/shorter prediction
pred_range = 5
# initialise model architecture
eth_model = build_model(LSTM_training_inputs, output_size=pred_range, neurons = 20)
# model output is next 5 prices normalised to 10th previous closing price
LSTM_training_outputs = []
for i in range(window_len, len(training_set['eth_Close**'])-pred_range):
    LSTM_training_outputs.append((training_set['eth_Close**'][i:i+pred_range].values/
                                  training_set['eth_Close**'].values[i-window_len])-1)
LSTM_training_outputs = np.array(LSTM_training_outputs)
# train model on data
# note: eth_history contains information on the training error per epoch
eth_history = eth_model.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs, 
                            epochs=50, batch_size=1, verbose=2, shuffle=True)

Epoch 1/50
 - 1s - loss: 0.2145
Epoch 2/50
 - 1s - loss: 0.1534
Epoch 3/50
 - 1s - loss: 0.1389
Epoch 4/50
 - 1s - loss: 0.1324
Epoch 5/50
 - 1s - loss: 0.1295
Epoch 6/50
 - 1s - loss: 0.1236
Epoch 7/50
 - 1s - loss: 0.1242
Epoch 8/50
 - 1s - loss: 0.1197
Epoch 9/50
 - 1s - loss: 0.1185
Epoch 10/50
 - 1s - loss: 0.1195
Epoch 11/50
 - 1s - loss: 0.1161
Epoch 12/50
 - 1s - loss: 0.1153
Epoch 13/50
 - 1s - loss: 0.1143
Epoch 14/50
 - 1s - loss: 0.1153
Epoch 15/50
 - 1s - loss: 0.1148
Epoch 16/50
 - 1s - loss: 0.1115
Epoch 17/50
 - 1s - loss: 0.1134
Epoch 18/50
 - 1s - loss: 0.1112
Epoch 19/50
 - 1s - loss: 0.1099
Epoch 20/50
 - 1s - loss: 0.1105
Epoch 21/50
 - 1s - loss: 0.1088
Epoch 22/50
 - 1s - loss: 0.1083
Epoch 23/50
 - 1s - loss: 0.1070
Epoch 24/50
 - 1s - loss: 0.1082
Epoch 25/50
 - 1s - loss: 0.1065
Epoch 26/50
 - 1s - loss: 0.1075
Epoch 27/50
 - 1s - loss: 0.1084
Epoch 28/50
 - 1s - loss: 0.1062
Epoch 29/50
 - 1s - loss: 0.1068
Epoch 30/50
 - 1s - loss: 0.1087
Epoch 31/50
 - 1s -

In [13]:
# random seed for reproducibility
np.random.seed(202)
# we'll try to predict the closing price for the next 5 days 
# change this value if you want to make longer/shorter prediction
pred_range = 5
# initialise model architecture
bt_model = build_model(LSTM_training_inputs, output_size=pred_range, neurons = 20)
# model output is next 5 prices normalised to 10th previous closing price
LSTM_training_outputs = []
for i in range(window_len, len(training_set['bt_Close**'])-pred_range):
    LSTM_training_outputs.append((training_set['bt_Close**'][i:i+pred_range].values/
                                  training_set['bt_Close**'].values[i-window_len])-1)
LSTM_training_outputs = np.array(LSTM_training_outputs)
# train model on data
# note: eth_history contains information on the training error per epoch
bt_history = bt_model.fit(LSTM_training_inputs[:-pred_range], LSTM_training_outputs, 
                            epochs=50, batch_size=1, verbose=2, shuffle=True)

Epoch 1/50
 - 1s - loss: 0.1615
Epoch 2/50
 - 1s - loss: 0.1150
Epoch 3/50
 - 1s - loss: 0.1007
Epoch 4/50
 - 1s - loss: 0.0937
Epoch 5/50
 - 1s - loss: 0.0881
Epoch 6/50
 - 1s - loss: 0.0873
Epoch 7/50
 - 1s - loss: 0.0836
Epoch 8/50
 - 1s - loss: 0.0812
Epoch 9/50
 - 1s - loss: 0.0802
Epoch 10/50
 - 1s - loss: 0.0808
Epoch 11/50
 - 1s - loss: 0.0767
Epoch 12/50
 - 1s - loss: 0.0769
Epoch 13/50
 - 1s - loss: 0.0756
Epoch 14/50
 - 1s - loss: 0.0763
Epoch 15/50
 - 1s - loss: 0.0746
Epoch 16/50
 - 1s - loss: 0.0742
Epoch 17/50
 - 1s - loss: 0.0752
Epoch 18/50
 - 1s - loss: 0.0740
Epoch 19/50
 - 1s - loss: 0.0740
Epoch 20/50
 - 1s - loss: 0.0718
Epoch 21/50
 - 1s - loss: 0.0730
Epoch 22/50
 - 1s - loss: 0.0725
Epoch 23/50
 - 1s - loss: 0.0725
Epoch 24/50
 - 1s - loss: 0.0722
Epoch 25/50
 - 1s - loss: 0.0719
Epoch 26/50
 - 1s - loss: 0.0706
Epoch 27/50
 - 1s - loss: 0.0699
Epoch 28/50
 - 1s - loss: 0.0694
Epoch 29/50
 - 1s - loss: 0.0694
Epoch 30/50
 - 1s - loss: 0.0700
Epoch 31/50
 - 2s -