In [45]:
import pandas as pd
import datetime
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras import backend as K

In [46]:
features = pd.read_csv("cooked_complete_dataset.csv")

In [47]:
features.dropna(inplace = True)

In [48]:
features.head()

Unnamed: 0,date,Adj_Close_BTC-USD,Open_BTC-USD,High_BTC-USD,Low_BTC-USD,Volume_BTC-USD,Adj_Close_SPY,Adj_Close_GLD,Adj_Close_CHFUSD=X,Adj_Close_CNYUSD=X,Adj_Close_EURUSD=X,Adj_Close_GBPUSD=X,Adj_Close_JPYUSD=X,coindesk_sentiment,num_of_coindesk_posts,reddit_comments_sentiments,top_50_reddit_posts_sentiments,blockchain_transactions_per_block,blockchain_hash_rates
0,2020-12-14,19246.64453,19144.49219,19305.09961,19012.70898,22473997681,361.926788,171.539993,1.125442,0.152772,1.21334,1.331824,0.009621,0.249489,12,0.188275,0.297238,2167.931034,134533587.6
1,2020-12-15,19417.07617,19246.91992,19525.00781,19079.8418,26741982541,366.819824,173.940002,1.12793,0.152679,1.21489,1.333084,0.009614,0.173773,18,0.144389,0.399427,2288.857143,133351912.2
2,2020-12-16,21310.59766,19418.81836,21458.9082,19298.31641,44409011479,367.395508,174.899994,1.129382,0.152945,1.21543,1.344447,0.009649,0.341491,11,0.137256,0.489673,2204.314685,132323572.3
3,2020-12-17,22805.16211,21308.35156,23642.66016,21234.67578,71378606374,369.449982,176.740005,1.129446,0.153109,1.219959,1.350293,0.009664,0.197572,10,0.156723,0.63603,2399.077519,132373208.7
4,2020-12-18,23137.96094,22806.79688,23238.60156,22399.8125,40387896275,367.974792,176.440002,1.130301,0.15309,1.226272,1.357018,0.009696,0.315601,2,0.166419,0.107093,2392.031847,131791042.0


In [49]:
def lag(data, dic):
    cols = []
    for key, value in dic.items():
        for i in range(1, value+1):
            cols.append(data[key].shift(i).rename('{}_lag{}'.format(data[key].name, i)))
    return pd.concat([data["date"],data["Adj_Close_BTC-USD"]] + cols, axis = 1)

## Without Sentiments

In [50]:
feature_lags = {"Adj_Close_BTC-USD" : 2, 
                "Volume_BTC-USD" : 1, 
                "Adj_Close_SPY" : 1,
                "Adj_Close_GLD" : 1,
                "Adj_Close_CHFUSD=X" : 1,
                "Adj_Close_CNYUSD=X" : 1,
                "Adj_Close_EURUSD=X" : 1,
                "Adj_Close_GBPUSD=X" : 1,
                "Adj_Close_JPYUSD=X" : 1,
                "blockchain_transactions_per_block" : 1,
                "blockchain_hash_rates" : 1}

data = lag(features, feature_lags)

data['date'] = data['date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))

data["date"] = pd.to_datetime(data["date"], format='%d/%m/%Y', infer_datetime_format=True)

data = data[data["date"] >= "1/1/2021"]

train = data[data["date"] <= "2021-03-10"]
test = data[data["date"] > "2021-03-10"]

x_train = train.drop(["date", "Adj_Close_BTC-USD"], axis = 1)
y_train = train["Adj_Close_BTC-USD"]

x_test = test.drop(["date", "Adj_Close_BTC-USD"], axis = 1)
y_test = test["Adj_Close_BTC-USD"]

sc = MinMaxScaler(feature_range = (0, 1))
sc.fit(x_train)
x_train_trans = sc.transform(x_train)
x_test_trans = sc.transform(x_test)

x_train_trans = x_train_trans.reshape(x_train_trans.shape[0], x_train_trans.shape[1], 1)
x_test_trans = x_test_trans.reshape(x_test_trans.shape[0], x_test_trans.shape[1], 1)

In [51]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 

In [55]:
def LSTM_GS(layer_spe, time_step):
    regressor = Sequential()
    count = 0
    for unit in layer_spe:
        count += 1 
        if len(layer_spe) == 1:
            regressor.add(LSTM(unit, activation = "relu", input_shape = (time_step, 1))) # batch size, time steps, data dimension
            regressor.add(Dropout(0.3))
        else:
            if count == 1:
                regressor.add(LSTM(unit, activation = "relu", return_sequences=True, input_shape = (time_step, 1))) # batch size, time steps, data dimension
                regressor.add(Dropout(0.3))
            elif count < len(layer_spe): 
                regressor.add(LSTM(unit, activation = "relu", return_sequences=True))
                regressor.add(Dropout(0.3))
            else:
                regressor.add(LSTM(unit, activation = "relu"))
                regressor.add(Dropout(0.3))
            
    regressor.add(Dense(units = 1))
    regressor.compile(optimizer = 'Adam', loss = root_mean_squared_error)
    regressor.fit(x_train_trans, y_train, epochs = 200, verbose = 0)
    
    yhat = regressor.predict(x_test_trans)
    return mean_squared_error(y_test, yhat, squared = False)


def LSTM_pipe(layer_spe, time_step):
    regressor = Sequential()
    count = 0
    for unit in layer_spe:
        count += 1 
        if len(layer_spe) == 1:
            regressor.add(LSTM(unit, activation = "relu", input_shape = (time_step, 1))) # batch size, time steps, data dimension
            regressor.add(Dropout(0.3))
        else:
            if count == 1:
                regressor.add(LSTM(unit, activation = "relu", return_sequences=True, input_shape = (time_step, 1))) # batch size, time steps, data dimension
                regressor.add(Dropout(0.3))
            elif count < len(layer_spe): 
                regressor.add(LSTM(unit, activation = "relu", return_sequences=True))
                regressor.add(Dropout(0.3))
            else:
                regressor.add(LSTM(unit, activation = "relu"))
                regressor.add(Dropout(0.3))
            
    regressor.add(Dense(units = 1))
    regressor.compile(optimizer = 'Adam', loss = root_mean_squared_error)
    regressor.fit(x_train_trans, y_train, epochs = 200, verbose = 0)
    return regressor

In [41]:
res = []
# 1 layer
for unit in (4,8,16,32):
    for time_step in (1,2,3,4,5):
        res.append((([unit],time_step),LSTM_GS([unit], time_step)))

# 2 layers
for unit in (8,16,32):
    for time_step in (1,2,3,4,5):
        res.append((([unit, unit//2],time_step),LSTM_GS([unit, unit//2], time_step)))

# 3 layers
for unit in (8,16,32):
    for time_step in (1,2,3,4,5):
        res.append((([unit, unit//2, unit//4],time_step),LSTM_GS([unit, unit//2, unit//4], time_step)))





















In [43]:
sorted(res, key = lambda x : x[1])[0]

(([32, 16, 8], 5), 2065.815175886998)

In [44]:
LSTM_regression = LSTM_pipe([32, 16, 8], 5)



## With Sentiments

In [52]:
features.columns

Index(['date', 'Adj_Close_BTC-USD', 'Open_BTC-USD', 'High_BTC-USD',
       'Low_BTC-USD', 'Volume_BTC-USD', 'Adj_Close_SPY', 'Adj_Close_GLD',
       'Adj_Close_CHFUSD=X', 'Adj_Close_CNYUSD=X', 'Adj_Close_EURUSD=X',
       'Adj_Close_GBPUSD=X', 'Adj_Close_JPYUSD=X', 'coindesk_sentiment',
       'num_of_coindesk_posts', 'reddit_comments_sentiments',
       'top_50_reddit_posts_sentiments', 'blockchain_transactions_per_block',
       'blockchain_hash_rates'],
      dtype='object')

In [53]:
feature_lags = {"Adj_Close_BTC-USD" : 2, 
                "Volume_BTC-USD" : 1, 
                "Adj_Close_SPY" : 1,
                "Adj_Close_GLD" : 1,
                "Adj_Close_CHFUSD=X" : 1,
                "Adj_Close_CNYUSD=X" : 1,
                "Adj_Close_EURUSD=X" : 1,
                "Adj_Close_GBPUSD=X" : 1,
                "Adj_Close_JPYUSD=X" : 1,
                "blockchain_transactions_per_block" : 1,
                "blockchain_hash_rates" : 1,
                "coindesk_sentiment" : 1,
                "num_of_coindesk_posts" : 1,
                "reddit_comments_sentiments" : 1,
                "top_50_reddit_posts_sentiments" : 1}

data = lag(features, feature_lags)

data['date'] = data['date'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))

data["date"] = pd.to_datetime(data["date"], format='%d/%m/%Y', infer_datetime_format=True)

data = data[data["date"] >= "1/1/2021"]

train = data[data["date"] <= "2021-03-10"]
test = data[data["date"] > "2021-03-10"]

x_train = train.drop(["date", "Adj_Close_BTC-USD"], axis = 1)
y_train = train["Adj_Close_BTC-USD"]

x_test = test.drop(["date", "Adj_Close_BTC-USD"], axis = 1)
y_test = test["Adj_Close_BTC-USD"]

sc = MinMaxScaler(feature_range = (0, 1))
sc.fit(x_train)
x_train_trans = sc.transform(x_train)
x_test_trans = sc.transform(x_test)

x_train_trans = x_train_trans.reshape(x_train_trans.shape[0], x_train_trans.shape[1], 1)
x_test_trans = x_test_trans.reshape(x_test_trans.shape[0], x_test_trans.shape[1], 1)

In [56]:
res2 = []
# 1 layer
for unit in (4,8,16,32):
    for time_step in (1,2,3,4,5):
        res2.append((([unit],time_step),LSTM_GS([unit], time_step)))

# 2 layers
for unit in (8,16,32):
    for time_step in (1,2,3,4,5):
        res2.append((([unit, unit//2],time_step),LSTM_GS([unit, unit//2], time_step)))

# 3 layers
for unit in (8,16,32):
    for time_step in (1,2,3,4,5):
        res2.append((([unit, unit//2, unit//4],time_step),LSTM_GS([unit, unit//2, unit//4], time_step)))





















In [57]:
sorted(res2, key = lambda x : x[1])[0]

(([16], 5), 2240.5092197602685)