In [3]:
import pandas as pd
import time
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np

In [4]:
# get market info for bitcoin from the start of 2016 to the current day
bitcoin_market_info = pd.read_html("https://coinmarketcap.com/currencies/bitcoin/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"),flavor='html5lib')[0]
# convert the date string to the correct date format
bitcoin_market_info = bitcoin_market_info.assign(Date=pd.to_datetime(bitcoin_market_info['Date']))
# when Volume is equal to '-' convert it to 0
bitcoin_market_info.loc[bitcoin_market_info['Volume']=="-",'Volume']=0
# convert to int
bitcoin_market_info['Volume'] = bitcoin_market_info['Volume'].astype('int64')
# look at the first few rows
bitcoin_market_info.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2018-03-23,8736.25,8879.62,8360.62,8879.62,5954120000,147941000000
1,2018-03-22,8939.44,9100.71,8564.9,8728.47,5530390000,151366000000
2,2018-03-21,8937.48,9177.37,8846.33,8929.28,6043130000,151316000000
3,2018-03-20,8619.67,9051.02,8389.89,8913.47,6361790000,145922000000
4,2018-03-19,8344.12,8675.87,8182.4,8630.65,6729110000,141240000000


In [5]:
# get market info for ethereum from the start of 2016 to the current day
eth_market_info = pd.read_html("https://coinmarketcap.com/currencies/ethereum/historical-data/?start=20130428&end="+time.strftime("%Y%m%d"),flavor='html5lib')[0]
# convert the date string to the correct date format
eth_market_info = eth_market_info.assign(Date=pd.to_datetime(eth_market_info['Date']))
# look at the first few rows
eth_market_info.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2018-03-23,539.86,540.49,512.13,539.62,1596350000,53098100000
1,2018-03-22,562.1,577.57,523.09,539.7,1523460000,55273800000
2,2018-03-21,559.1,589.61,550.57,561.73,1781270000,54967300000
3,2018-03-20,556.72,567.09,521.2,557.17,1833680000,54722100000
4,2018-03-19,546.63,558.1,519.12,556.73,2046790000,53718600000


In [6]:
bitcoin_market_info.columns =[bitcoin_market_info.columns[0]]+['bt_'+i for i in bitcoin_market_info.columns[1:]]
eth_market_info.columns =[eth_market_info.columns[0]]+['eth_'+i for i in eth_market_info.columns[1:]]

In [7]:

market_info = pd.merge(bitcoin_market_info,eth_market_info, on=['Date'])
market_info = market_info[market_info['Date']>='2016-01-01']
for coins in ['bt_', 'eth_']: 
    kwargs = { coins+'day_diff': lambda x: (x[coins+'Close']-x[coins+'Open'])/x[coins+'Open']}
    market_info = market_info.assign(**kwargs)
market_info.head()

Unnamed: 0,Date,bt_Open,bt_High,bt_Low,bt_Close,bt_Volume,bt_Market Cap,eth_Open,eth_High,eth_Low,eth_Close,eth_Volume,eth_Market Cap,bt_day_diff,eth_day_diff
0,2018-03-23,8736.25,8879.62,8360.62,8879.62,5954120000,147941000000,539.86,540.49,512.13,539.62,1596350000,53098100000,0.016411,-0.000445
1,2018-03-22,8939.44,9100.71,8564.9,8728.47,5530390000,151366000000,562.1,577.57,523.09,539.7,1523460000,55273800000,-0.0236,-0.039851
2,2018-03-21,8937.48,9177.37,8846.33,8929.28,6043130000,151316000000,559.1,589.61,550.57,561.73,1781270000,54967300000,-0.000917,0.004704
3,2018-03-20,8619.67,9051.02,8389.89,8913.47,6361790000,145922000000,556.72,567.09,521.2,557.17,1833680000,54722100000,0.034085,0.000808
4,2018-03-19,8344.12,8675.87,8182.4,8630.65,6729110000,141240000000,546.63,558.1,519.12,556.73,2046790000,53718600000,0.034339,0.018477


In [57]:
for coins in ['bt_', 'eth_']: 
    kwargs = { coins+'close_off_high': lambda x: 2*(x[coins+'High']- x[coins+'Close'])/(x[coins+'High']-x[coins+'Low'])-1,
            coins+'volatility': lambda x: (x[coins+'High']- x[coins+'Low'])/(x[coins+'Open'])}
    market_info = market_info.assign(**kwargs)

In [58]:
model_data = market_info[['Date']+[coin+metric for coin in ['bt_', 'eth_'] 
                                   for metric in ['Close','Volume','close_off_high','volatility']]]
# need to reverse the data frame so that subsequent rows represent later timepoints
model_data = model_data.sort_values(by='Date')
model_data.head()

Unnamed: 0,Date,bt_Close,bt_Volume,bt_close_off_high,bt_volatility,eth_Close,eth_Volume,eth_close_off_high,eth_volatility
812,2016-01-01,434.33,36278900,-0.560641,0.020292,0.948024,206062,-0.418477,0.02504
811,2016-01-02,433.44,30096600,0.250597,0.009641,0.937124,255504,0.965898,0.034913
810,2016-01-03,430.01,39633800,-0.173865,0.020827,0.971905,407632,-0.317885,0.060792
809,2016-01-04,433.09,38477500,-0.474265,0.012649,0.95448,346245,-0.057657,0.047943
808,2016-01-05,431.96,34522600,-0.013333,0.010391,0.950176,219833,0.69793,0.025236


In [59]:
#Training / Test Set Split

In [60]:
split_date = '2017-06-01'
training_set, test_set = model_data[model_data["Date"]<split_date], model_data[model_data['Date']>= split_date]
training_set = training_set.drop('Date',1)
test_set = test_set.drop('Date',1)

In [61]:
training_set.head()

Unnamed: 0,bt_Close,bt_Volume,bt_close_off_high,bt_volatility,eth_Close,eth_Volume,eth_close_off_high,eth_volatility
812,434.33,36278900,-0.560641,0.020292,0.948024,206062,-0.418477,0.02504
811,433.44,30096600,0.250597,0.009641,0.937124,255504,0.965898,0.034913
810,430.01,39633800,-0.173865,0.020827,0.971905,407632,-0.317885,0.060792
809,433.09,38477500,-0.474265,0.012649,0.95448,346245,-0.057657,0.047943
808,431.96,34522600,-0.013333,0.010391,0.950176,219833,0.69793,0.025236


In [62]:
#Window settings for timeseries analysis
window_len=10
norm_cols = [coin+metric for coin in ['bt_','eth_'] for metric in ['Close','Volume']]



Unnamed: 0,bt_Close,bt_Volume,bt_close_off_high,bt_volatility,eth_Close,eth_Volume,eth_close_off_high,eth_volatility
812,434.33,36278900,-0.560641,0.020292,0.948024,206062,-4.184773e-01,0.025040
811,433.44,30096600,0.250597,0.009641,0.937124,255504,9.658978e-01,0.034913
810,430.01,39633800,-0.173865,0.020827,0.971905,407632,-3.178846e-01,0.060792
809,433.09,38477500,-0.474265,0.012649,0.954480,346245,-5.765723e-02,0.047943
808,431.96,34522600,-0.013333,0.010391,0.950176,219833,6.979297e-01,0.025236
807,429.11,34042500,-0.003623,0.012782,0.950860,308791,-2.145405e-01,0.026263
806,458.05,87562200,-0.951499,0.069045,0.942005,647462,6.816436e-01,0.040587
805,453.23,56993000,0.294196,0.032762,0.986789,545600,-8.067166e-01,0.055274
804,447.61,32278000,0.814194,0.017094,0.986833,226281,-4.118965e-01,0.019021
803,447.99,35995900,-0.919598,0.017758,0.999231,390888,-9.382354e-01,0.025266


In [80]:
#Create normalized windows

LSTM_training_inputs=[]
LSTM_training_outputs=[]
for i in range(len(training_set.iloc[0:100,:])-window_len):
    temp_window=training_set[i:(i+window_len)]
    temp_set_in = temp_window[norm_cols]/temp_window[norm_cols].iloc[0] -1
    LSTM_training_inputs.append(temp_set_in.fillna(0.0))

In [81]:
LSTM_training_outputs = (training_set[norm_cols][window_len:].values/training_set[norm_cols][:-window_len].values)-1

In [78]:
print(LSTM_training_inputs[0])
print(LSTM_training_outputs[0])

     bt_Close  bt_Volume  eth_Close  eth_Volume
812  0.000000   0.000000   0.000000    0.000000
811 -0.002049  -0.170410  -0.011498    0.239937
810 -0.009946   0.092475   0.025190    0.978201
809 -0.002855   0.060603   0.006810    0.680295
808 -0.005457  -0.048411   0.002270    0.066829
807 -0.012019  -0.061645   0.002991    0.498534
806  0.054613   1.413585  -0.006349    2.142074
805  0.043515   0.570968   0.040890    1.647747
804  0.030576  -0.110282   0.040937    0.098121
803  0.031451  -0.007801   0.054014    0.896944
[ 0.03246379  0.11497317  0.11811515  3.91075502]


[  4.48430000e+02   4.04500000e+07   1.06000000e+00   1.01192000e+06]
[  4.34330000e+02   3.62789000e+07   9.48024000e-01   2.06062000e+05]
[ 1.03246379  1.11497317  1.11811515  4.91075502]


812    434.33
811    433.44
810    430.01
809    433.09
808    431.96
807    429.11
806    458.05
805    453.23
804    447.61
803    447.99
Name: bt_Close, dtype: float64