## Forex Predictor LSTM

In [1]:
import keras.backend as K
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from keras.callbacks import LearningRateScheduler
from keras.callbacks import ModelCheckpoint
from keras.layers import *
from keras.models import Sequential
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

from config import *

df = pd.read_csv("EURUSD_15m_BID_01.01.2010-31.12.2016.csv")
print(df.count())

# Rename bid OHLC columns
df.rename(columns={'Time': 'timestamp', 'Open': 'open', 'Close': 'close',
                   'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], infer_datetime_format=True)
df.set_index('timestamp', inplace=True)
df = df.astype(float)

# Add additional features
df['momentum'] = df['volume'] * (df['open'] - df['close'])
df['avg_price'] = (df['low'] + df['high']) / 2
# df['range'] = df['high'] - df['low']
df['ohlc_price'] = (df['low'] + df['high'] + df['open'] + df['close']) / 4
df['oc_diff'] = df['open'] - df['close']

print(df.head())


def create_dataset(dataset, look_back=20):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return np.array(dataX), np.array(dataY)


# Scale and create datasets
target_index = df.columns.tolist().index('close')
high_index = df.columns.tolist().index('high')
low_index = df.columns.tolist().index('low')
dataset = df.values.astype('float32')

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

# Create y_scaler to inverse it later
y_scaler = MinMaxScaler(feature_range=(0, 1))
t_y = df['close'].values.astype('float32')
t_y = np.reshape(t_y, (-1, 1))
y_scaler = y_scaler.fit(t_y)

X, y = create_dataset(dataset, look_back=50)
y = y[:, target_index]

train_size = int(len(X) * 0.99)
trainX = X[:train_size]
trainY = y[:train_size]
testX = X[train_size:]
testY = y[train_size:]

Using TensorFlow backend.


Time      245444
Open      245444
High      245444
Low       245444
Close     245444
Volume    245444
dtype: int64
                        open     high      low    close       volume  \
timestamp                                                              
2010-01-01 00:00:00  1.43283  1.43293  1.43224  1.43293  608600007.1   
2010-01-01 00:15:00  1.43285  1.43295  1.43229  1.43275  535600003.2   
2010-01-01 00:30:00  1.43280  1.43303  1.43239  1.43281  436299999.2   
2010-01-01 00:45:00  1.43285  1.43294  1.43229  1.43276  614299997.3   
2010-01-01 01:00:00  1.43287  1.43292  1.43206  1.43282  705300008.8   

                         momentum  avg_price  ohlc_price  oc_diff  
timestamp                                                          
2010-01-01 00:00:00 -60860.000710   1.432585    1.432732 -0.00010  
2010-01-01 00:15:00  53560.000320   1.432620    1.432710  0.00010  
2010-01-01 00:30:00  -4362.999992   1.432710    1.432758 -0.00001  
2010-01-01 00:45:00  55286.999757   1.43

## Model creation

In [6]:
model = Sequential()
model.add(
    Bidirectional(LSTM(90, input_shape=(X.shape[1], X.shape[2]),
                       return_sequences=True),
                  merge_mode='sum',
                  weights=None,
                  input_shape=(X.shape[1], X.shape[2])))
model.add(LSTM(30, return_sequences=True))
model.add(LSTM(20, return_sequences=True))
model.add(LSTM(10, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(4, return_sequences=False))
model.add(Dense(4, kernel_initializer='uniform', activation='relu'))
model.add(Dense(1, kernel_initializer='uniform', activation='relu'))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae', 'mse'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_3 (Bidirection (None, 50, 90)            72000     
_________________________________________________________________
lstm_10 (LSTM)               (None, 50, 30)            14520     
_________________________________________________________________
lstm_11 (LSTM)               (None, 50, 20)            4080      
_________________________________________________________________
lstm_12 (LSTM)               (None, 50, 10)            1240      
_________________________________________________________________
dropout_3 (Dropout)          (None, 50, 10)            0         
_________________________________________________________________
lstm_13 (LSTM)               (None, 4)                 240       
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 20        
__________

In [7]:
checkpoint = ModelCheckpoint(bi_rnn_weights,
                                 monitor='val_mean_squared_error',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')
callbacks_list = [checkpoint]

In [8]:
history = model.fit(trainX, trainY, epochs=10, batch_size=1024, verbose=1, callbacks=callbacks_list,
                        validation_split=0.1)

Train on 218645 samples, validate on 24294 samples
Epoch 1/10

Epoch 00001: val_mean_squared_error improved from inf to 0.03145, saving model to bi_rnn_trained_models/weights.best.hdf5
Epoch 2/10

Epoch 00002: val_mean_squared_error did not improve from 0.03145
Epoch 3/10

Epoch 00003: val_mean_squared_error did not improve from 0.03145
Epoch 4/10

Epoch 00004: val_mean_squared_error did not improve from 0.03145
Epoch 5/10

Epoch 00005: val_mean_squared_error did not improve from 0.03145
Epoch 6/10

Epoch 00006: val_mean_squared_error did not improve from 0.03145
Epoch 7/10

Epoch 00007: val_mean_squared_error did not improve from 0.03145
Epoch 8/10

Epoch 00008: val_mean_squared_error did not improve from 0.03145
Epoch 9/10

Epoch 00009: val_mean_squared_error did not improve from 0.03145
Epoch 10/10

Epoch 00010: val_mean_squared_error did not improve from 0.03145


In [None]:
model.load_weights(bi_rnn_weights)

## Benchmark

In [None]:
pred = model.predict(testX)
pred = y_scaler.inverse_transform(pred)
close = y_scaler.inverse_transform(np.reshape(testY, (testY.shape[0], 1)))
predictions = pd.DataFrame()
predictions['predicted'] = pd.Series(np.reshape(pred, (pred.shape[0])))
predictions['close'] = pd.Series(np.reshape(close, (close.shape[0])))
predictions['diff'] = predictions['predicted'] - predictions['close']

In [None]:
p = df[-pred.shape[0]:].copy()
predictions.index = p.index
predictions = predictions.astype(float)
predictions = predictions.merge(p[['low', 'high']], right_index=True, left_index=True)

In [None]:
flattened = pd.DataFrame(predictions.to_records())
flattened['forex_pair']="EURUSD"
flattened['timestamp']=flattened.timestamp.apply(lambda x: x.strftime('%Y%m%d %H:%M:%S'))
flattened['decision']=""


In [None]:
flattened.loc[flattened['diff'] > 0, 'decision'] = 'sell'
flattened.loc[flattened['diff'] == 0, 'decision'] = 'hold'
flattened.loc[flattened['diff'] < 0, 'decision'] = 'buy'


In [None]:
flattened.head(10)

In [None]:
result=flattened[['forex_pair','timestamp','decision']]


In [None]:
data=result.to_json(orient='records')

In [None]:
import io, json
with io.open('predictions.json', 'w', encoding='utf-8') as f:
  f.write(json.dumps(data, ensure_ascii=False))