In [1]:
import pandas as pd
import numpy as np

In [2]:
dataset = pd.read_csv("../data/btc.csv")

# Feature Engineering


In [3]:
import copy
data = dataset
tek_ind_1 = copy.deepcopy(data)
tek_ind_2 = copy.deepcopy(data)

In [4]:
tek_ind_1['daily_return'] = tek_ind_1.close.pct_change().fillna(0) # Percentage change between the current and a prior element
tek_ind_1['cum_daily_return'] = (1 + tek_ind_1['daily_return']).cumprod() # Cummulative Product (+1 is used not so we can ignore the 0s in the first couple rows)

tek_ind_1['H-L'] = tek_ind_1.high - dataset.low

tek_ind_1['C-O'] = tek_ind_1.close - tek_ind_1.open

tek_ind_1['10day Ma'] = tek_ind_1.close.shift(1).rolling(window = 10).mean().fillna(0)
tek_ind_1['50day Ma'] = tek_ind_1.close.shift(1).rolling(window = 50).mean().fillna(0)
tek_ind_1['200day Ma'] = tek_ind_1.close.shift(1).rolling(window = 200).mean().fillna(0)

import talib
tek_ind_1['rsi'] = talib.RSI(tek_ind_1.close.values, timeperiod = 14)

tek_ind_1['williams %R'] = talib.WILLR(tek_ind_1.high.values,
                                       tek_ind_1.low.values, 
                                      tek_ind_1.close.values, 
                                      14)

# create 7 and 21 days Moving Average
tek_ind_1['ma7'] = tek_ind_1.close.rolling(window=7).mean().fillna(0)
tek_ind_1['ma21'] = tek_ind_1.close.rolling(window=21).mean().fillna(0)

# creating MACD
tek_ind_1['ema_26'] = tek_ind_1.close.ewm(span=26).mean().fillna(0)
tek_ind_1['ema_12'] = tek_ind_1.close.ewm(span=12).mean().fillna(0)
tek_ind_1['macd'] = (tek_ind_1['ema_12'] - tek_ind_1['ema_26'])

In [5]:
# creating bollinger Bands
#Set number of days and standard deviation to use for rolling lookback period for bollinger band calculation
window = 21
no_of_std =2

#calculate rolling mean and standard deviation using number of days set above
rolling_mean = tek_ind_1.close.rolling(window).mean()
rolling_std = tek_ind_1.close.rolling(window).std()

#create two new DF column to hold values of upper and lower Bollinger bands
tek_ind_1['bb_high'] =(rolling_mean + (rolling_std * no_of_std)).fillna(0)
tek_ind_1['bb_low'] =(rolling_mean - (rolling_std * no_of_std)).fillna(0)

#create exponental moving average
tek_ind_1['ema'] = tek_ind_1.close.ewm(com=0.5).mean()

#create momentum
tek_ind_1['momentum'] = tek_ind_1.close - 1

# tek_ind_1.head(20)

In [6]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(15, 10))
# plt.plot(tek_ind_1['close'], label ='Actual')
# plt.plot(tek_ind_1['bb_high'], label ='BBHigh')
# plt.plot(tek_ind_1['bb_low'], label ='BBLow')
# plt.legend(loc='best')

In [7]:
tek_ind_1_copy = copy.deepcopy(tek_ind_1)
tek_ind_1_copy = tek_ind_1_copy.fillna(0).drop(columns=['date', 'volume', 'adjClose', 'adjHigh','adjLow','adjOpen', 'adjVolume', 'divCash', 'splitFactor'])

tek_ind_1_copy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3451 entries, 0 to 3450
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   close             3451 non-null   float64
 1   high              3451 non-null   float64
 2   low               3451 non-null   float64
 3   open              3451 non-null   float64
 4   daily_return      3451 non-null   float64
 5   cum_daily_return  3451 non-null   float64
 6   H-L               3451 non-null   float64
 7   C-O               3451 non-null   float64
 8   10day Ma          3451 non-null   float64
 9   50day Ma          3451 non-null   float64
 10  200day Ma         3451 non-null   float64
 11  rsi               3451 non-null   float64
 12  williams %R       3451 non-null   float64
 13  ma7               3451 non-null   float64
 14  ma21              3451 non-null   float64
 15  ema_26            3451 non-null   float64
 16  ema_12            3451 non-null   float64


In [8]:
values = tek_ind_1_copy.values.astype('float32')
values

array([[1.0900000e+01, 1.0900000e+01, 1.0900000e+01, ..., 0.0000000e+00,
        1.0900000e+01, 9.8999996e+00],
       [1.1690000e+01, 1.1850000e+01, 1.1150000e+01, ..., 0.0000000e+00,
        1.1492500e+01, 1.0690000e+01],
       [1.1700000e+01, 1.1700000e+01, 1.1700000e+01, ..., 0.0000000e+00,
        1.1636154e+01, 1.0700000e+01],
       ...,
       [4.5172973e+04, 4.6649246e+04, 4.3020789e+04, ..., 4.1751121e+04,
        4.5576902e+04, 4.5171973e+04],
       [4.9597684e+04, 4.9796078e+04, 4.4981742e+04, ..., 4.2134691e+04,
        4.8257422e+04, 4.9596684e+04],
       [4.8462754e+04, 5.0206160e+04, 4.7063383e+04, ..., 4.2345035e+04,
        4.8394309e+04, 4.8461754e+04]], dtype=float32)

In [9]:
print ("Min: ", np.min(values))
print ("Max: ", np.max(values))
values = pd.DataFrame(values)

Min:  -5185.0405
Max:  60030.39


In [11]:
values[70:]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
70,2.250000,3.000000,2.250000,2.450000,-0.050633,0.206422,0.750000,-0.200000,2.826000,3.877000,...,-100.000000,2.577143,3.036667,3.113666,2.721575,-0.392091,3.954803,2.118531,2.304783,1.250000
71,2.590000,2.590000,2.460000,2.530000,0.151111,0.237615,0.130000,0.060000,2.721000,3.824000,...,-69.090912,2.590000,2.971905,3.074723,2.701333,-0.373390,3.807623,2.136187,2.494928,1.590000
72,3.000000,3.000000,2.540000,2.540000,0.158301,0.275229,0.460000,0.460000,2.645000,3.762600,...,-31.818182,2.590000,2.948095,3.069168,2.747282,-0.321886,3.748361,2.147830,2.831643,2.000000
73,2.750000,15.000000,2.500000,2.500000,-0.083333,0.252294,12.500000,0.250000,2.610000,3.709400,...,-96.078430,2.555714,2.921905,3.045446,2.747700,-0.297746,3.709702,2.134107,2.777214,1.750000
74,2.770000,2.770000,2.770000,2.770000,0.007273,0.254128,0.000000,0.000000,2.638000,3.650000,...,-95.921570,2.594286,2.895238,3.024979,2.751131,-0.273848,3.662665,2.127811,2.772405,1.770000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3446,46294.660156,48436.054688,44101.605469,47081.562500,-0.016641,4247.216309,4334.447754,-786.906006,52178.921875,40734.160156,...,-84.615631,51331.933594,48664.531250,46607.777344,49568.195312,2960.416748,58266.695312,39062.367188,46891.851562,46293.660156
3447,46131.218750,48388.878906,45010.531250,46284.902344,-0.003530,4232.222168,3378.345947,-153.682755,51893.777344,40871.097656,...,-85.762161,49943.273438,48994.253906,46572.476562,49039.429688,2466.952148,57662.570312,40325.937500,46384.761719,46130.218750
3448,45172.972656,46649.246094,43020.789062,46127.402344,-0.020772,4144.309570,3628.455566,-954.430298,51293.832031,40981.621094,...,-85.966362,48193.121094,49297.121094,46468.808594,48444.589844,1975.779541,56843.125000,41751.121094,45576.902344,45171.972656
3449,49597.683594,49796.078125,44981.742188,45167.027344,0.097950,4550.246094,4814.334473,4430.654785,50654.812500,41082.343750,...,-57.114357,47551.691406,49487.710938,46700.578125,48621.988281,1921.410034,56840.734375,42134.691406,48257.421875,49596.683594


In [None]:
def create_time_series(data, look_back=60, index_of_column_to_predict=0):
    temp = data.copy()
    temp["id"] = range(1, len(temp) +1)
    temp = temp.iloc[:look_back, :]
    temp.set_index('id', inplace = True)
    pred_value = data.copy()
    pred_value = pred_value.iloc[look_back:, index_of_column_to_predict]
    pred_value.columns = ["Pred"]
    pred_value = pd.DataFrame(pred_value)
    
    pred_value["id"] =range(1, len(pred_value) +1 )
    pred_value.set_index('id', inplace = True)
    final_df =pd.concat([temp, pred_value], axis=1)
    
    return final_df

In [None]:
arr_df

In [None]:
lookBack = 60

arr_df = create_time_series(values, lookBack, tek_ind_1_copy.columns.get_loc("close"))
arr_df.fillna(0, inplace=True)

arr_df.columns = ['v1(t-60)','v2(t-60)','v3(t-60)','v4(t-60)','v5(t-60)','v6(t-60)',
                  'v7(t-60)','v8(t-60)','v9(t-60)','v10(t-60)','v11(t-60)','v12(t-60)',
                  'v13(t-60)','v14(t-60)','v15(t-60)','v16(t-60)','v17(t-60)','v18(t-60)',
                  'v19(t-60)','v20(t-60)','v21(t-60)','v22(t-60)', 'v1(t)']

print(arr_df.head()) 

In [None]:
arr_df.describe()

In [None]:
#split into test and train sets
val = arr_df.values
train_sample = int(len(tek_ind_1_copy) * 0.8)

train = pd.DataFrame(val[: train_sample, :])
test = pd.DataFrame(val[train_sample:, :])

print(train.shape, test.shape)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X = scaler.fit_transform(train)
print(X.shape)

In [None]:
X_train , y_train= [], []
for i in range (lookBack, X.shape[0]):
    X_train.append(X[i-60: i])
    y_train.append(X[i, 0])
    if i <= 61:
        print(X_train)
        print ('\n')
        print (y_train)
        print()
        

X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
# input_dim = 22
# hidden_dim = 75
# num_layers = 3
# output_dim = 1
# num_epochs = 100

In [None]:
# import torch
# import torch.nn as nn
# x_train = torch.from_numpy(X_train).type(torch.Tensor)
# x_test = torch.from_numpy(X_test).type(torch.Tensor)
# y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor)
# y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor)
# y_train_gru = torch.from_numpy(y_train).type(torch.Tensor)
# y_test_gru = torch.from_numpy(y_test).type(torch.Tensor)

In [None]:
look_back = train.tail(60)
data = look_back.append(test)
print (data)

inputs = scaler.transform(data)

In [None]:
X_test , y_test= [], []
for i in range(lookBack, inputs.shape[0]):
    X_test.append(inputs[i-lookBack: i])
    y_test.append(inputs[i,0])
    if i <= 61:
        print(X_test)
        print ('\n')
        print (y_test)
        print()
        

X_test, y_test = np.array(X_test), np.array(y_test)

# TensorFlow

## LSTM

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_lstm = tf.keras.Sequential()
model_lstm.add(tf.keras.layers.LSTM(units=75, return_sequences =True,input_shape=(X_train.shape[1], X_train.shape[2])))
model_lstm.add(tf.keras.layers.LSTM(units=30, return_sequences =True))
model_lstm.add(tf.keras.layers.LSTM(units=30, return_sequences =True))

model_lstm.add(tf.keras.layers.Dense(units=1))
model_lstm.compile(loss='mae', optimizer='adam')
model_lstm.summary()

history_lstm = model_lstm.fit(X_train, y_train, epochs =35, batch_size=32, validation_data =(X_test, y_test), shuffle=False)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(history_lstm.history['loss'], label ='train_loss', color='red')
plt.plot(history_lstm.history['val_loss'], label ='test_loss', color='blue')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(loc='best')

In [None]:
# y_pred = model_lstm.predict(X_test)
# scaler.scale_
# normal_scale = 1/5.21225901e-05
# y_pred = y_pred * normal_scale
# y_test = y_test * normal_scale

# mean_y_test = y_test.mean()
# mean_y_pred = y_pred.mean()

# print(mean_y_test, mean_y_pred)
# accuracy = round((mean_y_test/mean_y_pred )*100, 2)
# accuracy

## GRU


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_gru = tf.keras.Sequential()
model_gru.add(tf.keras.layers.GRU(units=75, return_sequences =True,input_shape=(X_train.shape[1], X_train.shape[2])))
model_gru.add(tf.keras.layers.GRU(units=30, return_sequences =True))
model_gru.add(tf.keras.layers.GRU(units=30))
model_gru.add(tf.keras.layers.Dense(units=1))

model_gru.compile(loss='mae', optimizer='adam')
model_gru.summary()

history_gru = model_gru.fit(X_train, y_train, epochs =20, batch_size=64, validation_data =(X_test, y_test), shuffle=False)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(history_gru.history['loss'], label ='train_loss', color='red')
plt.plot(history_gru.history['val_loss'], label ='test_loss', color='blue')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend(loc='best')

In [None]:
y_pred = model_gru.predict(X_test)

In [None]:
# scaler.scale_
# normal_scale = 1/5.21225901e-05

# y_pred = y_pred * normal_scale
# y_test = y_test * normal_scale

# mean_y_test = y_test.mean()
# mean_y_pred = y_pred.mean()

# print(mean_y_test, mean_y_pred)
# accuracy = round((mean_y_test/mean_y_pred )*100, 2)
# accuracy

In [None]:
X_train.shape

# Pytorch

In [None]:
input_dim = 23
hidden_dim = 32
num_layers = 3
output_dim = 1
num_epochs = 100

In [None]:
import torch
import torch.nn as nn

x_train = torch.from_numpy(X_train).type(torch.Tensor)
x_test = torch.from_numpy(X_test).type(torch.Tensor)
y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor)
y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor)
y_train_gru = torch.from_numpy(y_train).type(torch.Tensor)
y_test_gru = torch.from_numpy(y_test).type(torch.Tensor)

In [None]:
class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn) = self.gru(x, (h0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out
    

In [None]:
model = GRU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
import time
hist = np.zeros(num_epochs)
start_time = time.time()
lstm = []
for t in range(num_epochs):
    y_train_pred = model(x_train)
    loss = criterion(y_train_pred, y_train_lstm)
    print("Epoch ", t, "Mean Squared Error: ", loss.item())
    hist[t] = loss.item()
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()
    
training_time = time.time()-start_time

In [None]:
y_pred = model(x_test)

In [None]:

x_test

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 10))
plt.plot(y_pred.detach().numpy(), label ='pred')
plt.plot(y_test, label ='actual')
plt.legend(loc='best')