In [4]:
import pandas as pd
import numpy as np
import copy
import matplotlib.pyplot as plt


In [5]:
dataset = pd.read_csv("../data/ada.csv")

In [6]:
dataset

Unnamed: 0,date,close,high,low,open,volume,adjClose,adjHigh,adjLow,adjOpen,adjVolume,divCash,splitFactor
0,2017-12-29,0.517300,1.080000,0.400004,1.000000,4.262042e+06,0.517300,1.080000,0.400004,1.000000,4.262042e+06,0.0,1.0
1,2017-12-30,0.590030,0.694139,0.420000,0.520000,2.496892e+07,0.590030,0.694139,0.420000,0.520000,2.496892e+07,0.0,1.0
2,2017-12-31,0.710000,0.740000,0.590020,0.590030,1.324840e+07,0.710000,0.740000,0.590020,0.590030,1.324840e+07,0.0,1.0
3,2018-01-01,0.702160,0.715000,0.632000,0.703400,1.380754e+07,0.702160,0.715000,0.632000,0.703400,1.380754e+07,0.0,1.0
4,2018-01-02,0.762000,0.800000,0.675020,0.704000,8.440669e+06,0.762000,0.800000,0.675020,0.704000,8.440669e+06,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1180,2021-04-27,1.306639,1.333633,1.223582,1.237973,4.791092e+08,1.306639,1.333633,1.223582,1.237973,4.791092e+08,0.0,1.0
1181,2021-04-28,1.337979,1.349051,1.233085,1.306617,4.717582e+08,1.337979,1.349051,1.233085,1.306617,4.717582e+08,0.0,1.0
1182,2021-04-29,1.306542,1.411739,1.266448,1.337613,6.828781e+08,1.306542,1.411739,1.266448,1.337613,6.828781e+08,0.0,1.0
1183,2021-04-30,1.353140,1.370994,1.285897,1.306601,3.870279e+08,1.353140,1.370994,1.285897,1.306601,3.870279e+08,0.0,1.0


In [7]:
data = copy.deepcopy(dataset)
data = data.drop(columns=['date', 'volume', 'adjClose', 'adjHigh','adjLow','adjOpen', 'adjVolume', 'divCash', 'splitFactor'], axis=1)


In [9]:
if data.isnull().sum().sum() >0:
    data = data.fillna(method='pad') # filling null values with their previous
    print ('filled empty values')
else:
    print ('No empty values')

No empty values


# Feature Engineering


In [10]:
tek_ind_1 = copy.deepcopy(data)
tek_ind_2 = copy.deepcopy(data)

In [11]:
    
def create_technical_indicators_set_1(data):
    data = _get_daily_return(data)
    data = _get_price_gap(data)
    data = _get_moving_average(data)
    data = _get_rsi(data)
    data = _get_R_precentage(data)
    data = _get_bollinger_bands(data)
    data = _get_exponential_ma(data)
    data = _get_momentum(data)
    return data


def _get_daily_return(data):
    # Percentage change between the current and a prior element
    data['daily_return'] = data.close.pct_change().fillna(0)
    
    # Cummulative Product (+1 is used not so we can ignore the 0s in the first couple rows)
    data['cum_daily_return'] = (1 + data['daily_return']).cumprod() 
    return data

def _get_price_gap(data):
    data['H-L'] = data.high - data.low
    data['C-O'] = data.close - data.open
    return data

def _get_moving_average(data):
    data['10day Ma'] = data.close.shift(1).rolling(window = 10).mean().fillna(0)
    data['50day Ma'] = data.close.shift(1).rolling(window = 50).mean().fillna(0)
    data['200day Ma'] = data.close.shift(1).rolling(window = 200).mean().fillna(0)
    
    data['ma7'] = data.close.rolling(window=7).mean().fillna(0)
    data['ma21'] = data.close.rolling(window=21).mean().fillna(0)
    
    # creating MA convergeance and divergence
    data['ema_26'] = data.close.ewm(span=26).mean().fillna(0)
    data['ema_12'] = data.close.ewm(span=12).mean().fillna(0)
    data['macd'] = (data['ema_12'] - data['ema_26'])
    
    return data
    

def _get_rsi(data):
    import talib
    data['rsi'] = talib.RSI(data.close.values, timeperiod = 14)
    return data

def _get_R_precentage(data):
    import talib
    data['R%'] = talib.WILLR(data.high.values, data.low.values, data.close.values,14)
    return data

def _get_bollinger_bands(data, window = 21,no_of_std =2):
    #calculate rolling mean and standard deviation using number of days set above
    rolling_mean = data.close.rolling(window).mean()
    rolling_std = data.close.rolling(window).std()
    #Create upper and lower Bollinger bands
    data['bb_high'] =(rolling_mean + (rolling_std * no_of_std)).fillna(0)
    data['bb_low'] =(rolling_mean - (rolling_std * no_of_std)).fillna(0)
    return data

def _get_exponential_ma(data):
    data['ema'] = data.close.ewm(com=0.5).mean()
    return data
def _get_momentum(data):
    data['momentum'] = data.close - 1
    return data

In [12]:
tek_ind_1 = create_technical_indicators_set_1(tek_ind_1)

In [13]:
# plt.figure(figsize=(15, 10))
# plt.plot(tek_ind_1['close'], label ='Actual')
# plt.plot(tek_ind_1['bb_high'], label ='BBHigh')
# plt.plot(tek_ind_1['bb_low'], label ='BBLow')
# plt.legend(loc='best')

In [14]:
# plt.figure(figsize=(15, 10))
# plt.plot(tek_ind_1['10day Ma'], label ='10day Ma')
# plt.plot(tek_ind_1['50day Ma'], label ='50day Ma')
# plt.plot(tek_ind_1['200day Ma'], label ='200day Ma')
# plt.legend(loc='best')

In [19]:
def prepare_data(data, lookback = 20):
    data = _check_null_values(data)
    values = pd.DataFrame(data.values.astype('float32'))
    values = _scale_data(values)
    values = _create_time_series(values)
    return _split_data(values)
    

def _check_null_values(data):
    if data.isnull().sum().sum() >0 :
        data = data.fillna(0) # filling null values with their previous
        print ('filled empty values')
    else:
        print ('No empty values')
    return data

def _scale_data(data):
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    return scaler.fit_transform(data)

def _create_time_series(data, lookback=20):
    timeseries = []
    for index in range(len(data) - lookback): 
        timeseries.append(data[index: index + lookback])
    timeseries = np.array(timeseries);
    return timeseries

def _split_data(data_raw, testSize = 0.1):
    data = np.array(data_raw);
    test_set_size = int(np.round(testSize*data.shape[0]));
    train_set_size = data.shape[0] - (test_set_size);
    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,-1]
    
    x_test = data[train_set_size:,:-1,:]
    y_test = data[train_set_size:,-1,-1]
    
    return [x_train, y_train, x_test, y_test]

In [22]:
lookback = 20
x_train, y_train, x_test, y_test = prepare_data(tek_ind_1)


filled empty values


# TensorFlow

## LSTM

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_lstm = tf.keras.Sequential()
model_lstm.add(tf.keras.layers.LSTM(units=75, return_sequences =True,input_shape=(x_train.shape[1], x_train.shape[2])))
model_lstm.add(tf.keras.layers.LSTM(units=30, return_sequences =True))
model_lstm.add(tf.keras.layers.LSTM(units=30, return_sequences =True))

model_lstm.add(tf.keras.layers.Dense(units=1))
model_lstm.compile(loss='mae', optimizer='adam')
model_lstm.summary()

history_lstm = model_lstm.fit(x_train, y_train, epochs =100, batch_size=32, validation_data =(x_test, y_test), shuffle=False)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 19, 75)            29400     
_________________________________________________________________
lstm_1 (LSTM)                (None, 19, 30)            12720     
_________________________________________________________________
lstm_2 (LSTM)                (None, 19, 30)            7320      
_________________________________________________________________
dense (Dense)                (None, 19, 1)             31        
Total params: 49,471
Trainable params: 49,471
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100

In [None]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(10, 6))
# plt.plot(history_lstm.history['loss'], label ='train_loss', color='red')
# plt.plot(history_lstm.history['val_loss'], label ='test_loss', color='blue')
# plt.xlabel('epochs')
# plt.ylabel('loss')
# plt.legend(loc='best')

In [None]:
x_test.shape

In [None]:
y_pred_tf_lstm = model_lstm.predict(x_test)

y_pred_tf_lstm = y_pred_tf_lstm[:, -1, 0]

In [None]:
y_test.shape

In [None]:
y_pred_tf_lstm.shape

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 10))
plt.plot(y_pred_tf_lstm, label ='pred')
plt.plot(y_test, label ='actual')
plt.legend(loc='best')

In [None]:
# smaller test size
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 10))
plt.plot(y_pred_tf_lstm, label ='pred')
plt.plot(y_test, label ='actual')
plt.legend(loc='best')

## GRU


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model_gru = tf.keras.Sequential()
model_gru.add(tf.keras.layers.GRU(units=75, return_sequences =True,input_shape=(x_train.shape[1], x_train.shape[2])))
model_gru.add(tf.keras.layers.GRU(units=30, return_sequences =True))
model_gru.add(tf.keras.layers.GRU(units=30))
model_gru.add(tf.keras.layers.Dense(units=1))

model_gru.compile(loss='mae', optimizer='adam')
model_gru.summary()

history_gru = model_gru.fit(x_train, y_train, epochs =100, batch_size=32, validation_data =(x_test, y_test), shuffle=False)

In [None]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(10, 6))
# plt.plot(history_gru.history['loss'], label ='train_loss', color='red')
# plt.plot(history_gru.history['val_loss'], label ='test_loss', color='blue')
# plt.xlabel('epochs')
# plt.ylabel('loss')
# plt.legend(loc='best')

In [None]:
y_pred_tf_gru = model_gru.predict(x_test)
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 10))
plt.plot(y_pred_tf_gru, label ='pred')
plt.plot(y_test, label ='actual')
plt.legend(loc='best')

In [None]:
# scaler.scale_
# normal_scale = 1/5.21225901e-05

# y_pred = y_pred * normal_scale
# y_test = y_test * normal_scale

# mean_y_test = y_test.mean()
# mean_y_pred = y_pred.mean()

# print(mean_y_test, mean_y_pred)
# accuracy = round((mean_y_test/mean_y_pred )*100, 2)
# accuracy

In [None]:
# X_train.shape

In [None]:
print (x_train.shape)
print (y_train.shape)
print (x_test.shape)
print (y_test.shape)

# Pytorch

In [40]:
input_dim = 22
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100

In [41]:
import torch
import torch.nn as nn

x_train_gru = torch.from_numpy(x_train).type(torch.Tensor)
x_test_gru = torch.from_numpy(x_test).type(torch.Tensor)
y_train_gru = torch.from_numpy(y_train).type(torch.Tensor)
y_test_gru = torch.from_numpy(y_test).type(torch.Tensor)


In [42]:
class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn) = self.gru(x, (h0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out

In [43]:
model_pt_gru = GRU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model_pt_gru.parameters(), lr=0.01)

In [44]:
import time
hist = np.zeros(num_epochs)

for t in range(num_epochs):
    y_train_pred = model_pt_gru(x_train_lstm)
    loss = criterion(y_train_pred, y_train_lstm)
    print("Epoch ", t, "Mean Squared Error: ", loss.item())
    hist[t] = loss.item()
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()
    
training_time = time.time()-start_time

NameError: name 'x_train_lstm' is not defined

In [None]:
y_pred_pt_gru = model_pt_gru(x_test_lstm)


In [None]:
y_pred_pt_gru.detach().numpy()

In [None]:
y_test_gru

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(15, 10))
plt.plot(y_pred_pt_gru.detach().numpy(), label ='pred')
plt.plot(y_test_gru, label ='actual')
plt.legend(loc='best')