In [1]:
from OptimizedDNN import *
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.regularizers import l1, l2
from keras.optimizers import Adam
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

In [2]:
data = pd.read_csv('../data/EURUSD_HOUR.csv', parse_dates=['time'], index_col='time')
dataset = data.rename(columns={'price': 'Price'})
print(dataset)

                       Price
time                        
2015-05-15 13:00:00  1.13546
2015-05-15 14:00:00  1.14378
2015-05-15 15:00:00  1.14282
2015-05-15 16:00:00  1.14320
2015-05-15 17:00:00  1.14368
...                      ...
2023-08-01 10:00:00  1.09822
2023-08-01 11:00:00  1.09756
2023-08-01 12:00:00  1.09565
2023-08-01 13:00:00  1.09752
2023-08-01 13:00:00  1.09753

[51790 rows x 1 columns]


In [3]:
df = dataset.copy()
WINDOW = 50
FAST_SMA = 75
SLOW_SMA = 150
FAST_EMA = 12
SLOW_EMA = 26
SIGNAL_EMA = 9
RSI_WINDOW = 14

# Log Returns
df['Returns'] = np.log(df['Price'] / df['Price'].shift(1))
# Direction for class weight balancing to eliminate buy bias
df['Direction'] = np.where(df['Returns'] > 0, 'buy', 'sell')
# MACD Growth Indicator
macd = df['Price'].ewm(span=FAST_EMA, adjust=False).mean() - df['Price'].ewm(span=SLOW_EMA, adjust=False).mean()
signal = macd.ewm(span=SIGNAL_EMA, adjust=False).mean()
df['MACD'] = macd - signal
# SMA Crossover with 75-150 Fast SMA-Slow SMA split
df['SMA Crossover'] = df['Price'].rolling(FAST_SMA).mean() - df['Price'].rolling(SLOW_SMA).mean()
# Mean Reversion (similar to Bollinger Bands) with 50-period window
df['Mean Reversion'] = (df['Price'] - df['Price'].rolling(WINDOW).mean()) / df['Price'].rolling(WINDOW).std()
# Rolling Min/Max normalization with current price
df['Rolling Min'] = (df['Price'].rolling(WINDOW).min() / df['Price']) - 1
df['Rolling Max'] = (df['Price'].rolling(WINDOW).max() / df['Price']) - 1
# Momentum
change = df['Price'].diff()
df['RSI'] = 100 - (100 / (1 + (change.mask(change < 0, 0.0).rolling(RSI_WINDOW).mean() / -change.mask(change > 0, -0.0).rolling(RSI_WINDOW).mean())))
# Volatility
df['Volatility'] = df['Returns'].rolling(WINDOW).std()

In [4]:
encoder = LabelEncoder()
df['Direction'] = encoder.fit_transform(df['Direction'])

In [5]:
df.dropna(inplace=True)
df

Unnamed: 0_level_0,Price,Returns,Direction,MACD,SMA Crossover,Mean Reversion,Rolling Min,Rolling Max,RSI,Volatility
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-05-25 16:00:00,1.09854,0.001330,0,0.000466,-0.010037,-0.937949,-0.001329,0.018861,48.902196,0.001643
2015-05-25 17:00:00,1.09747,-0.000974,1,0.000457,-0.009924,-1.037918,-0.000355,0.019855,44.183950,0.001642
2015-05-25 18:00:00,1.09794,0.000428,0,0.000479,-0.009777,-0.937183,-0.000783,0.019418,47.690941,0.001641
2015-05-25 19:00:00,1.09798,0.000036,0,0.000492,-0.009641,-0.895203,-0.000820,0.019381,47.032374,0.001637
2015-05-25 20:00:00,1.09775,-0.000209,1,0.000480,-0.009522,-0.889358,-0.000610,0.019595,36.050157,0.001625
...,...,...,...,...,...,...,...,...,...,...
2023-08-01 10:00:00,1.09822,0.000619,0,-0.000119,-0.003103,-1.479394,-0.000783,0.005117,37.888199,0.000724
2023-08-01 11:00:00,1.09756,-0.000601,1,-0.000117,-0.003214,-1.851224,-0.000073,0.005722,33.618234,0.000707
2023-08-01 12:00:00,1.09565,-0.001742,1,-0.000224,-0.003375,-2.662682,0.000000,0.007475,24.221453,0.000700
2023-08-01 13:00:00,1.09752,0.001705,0,-0.000152,-0.003527,-1.623451,-0.001704,0.005758,39.306931,0.000713


In [6]:
lags = 8
columns = []
features = ['Returns', 'Direction', 'MACD', 'SMA Crossover', 'Mean Reversion', 'Rolling Min', 'Rolling Max', 'RSI', 'Volatility']

for feature in features:
    for lag in range(1, lags + 1):
        column = '{}_lag_{}'.format(feature, lag)
        df[column] = df[feature].shift(lag)
        columns.append(column)
df.dropna(inplace=True)
df

Unnamed: 0_level_0,Price,Returns,Direction,MACD,SMA Crossover,Mean Reversion,Rolling Min,Rolling Max,RSI,Volatility,...,RSI_lag_7,RSI_lag_8,Volatility_lag_1,Volatility_lag_2,Volatility_lag_3,Volatility_lag_4,Volatility_lag_5,Volatility_lag_6,Volatility_lag_7,Volatility_lag_8
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-05-26 00:00:00,1.09438,-0.002254,1,0.000175,-0.008930,-1.188868,0.000000,0.022734,33.910387,0.001643,...,44.183950,48.902196,0.001619,0.001616,0.001616,0.001625,0.001637,0.001641,0.001642,0.001643
2015-05-26 01:00:00,1.09386,-0.000475,1,0.000015,-0.008810,-1.207758,0.000000,0.023221,31.139489,0.001642,...,47.690941,44.183950,0.001643,0.001619,0.001616,0.001616,0.001625,0.001637,0.001641,0.001642
2015-05-26 02:00:00,1.09500,0.001042,0,0.000003,-0.008707,-1.014690,-0.001041,0.022155,32.854406,0.001645,...,47.032374,47.690941,0.001642,0.001643,0.001619,0.001616,0.001616,0.001625,0.001637,0.001641
2015-05-26 03:00:00,1.09428,-0.000658,1,-0.000036,-0.008601,-1.069777,-0.000384,0.022828,35.288066,0.001640,...,36.050157,47.032374,0.001645,0.001642,0.001643,0.001619,0.001616,0.001616,0.001625,0.001637
2015-05-26 04:00:00,1.09362,-0.000603,1,-0.000084,-0.008519,-1.118152,0.000000,0.023445,32.524272,0.001633,...,54.585799,36.050157,0.001640,0.001645,0.001642,0.001643,0.001619,0.001616,0.001616,0.001625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01 10:00:00,1.09822,0.000619,0,-0.000119,-0.003103,-1.479394,-0.000783,0.005117,37.888199,0.000724,...,9.767442,20.600273,0.000726,0.000770,0.000792,0.000787,0.000787,0.000784,0.000782,0.000782
2023-08-01 11:00:00,1.09756,-0.000601,1,-0.000117,-0.003214,-1.851224,-0.000073,0.005722,33.618234,0.000707,...,19.223986,9.767442,0.000724,0.000726,0.000770,0.000792,0.000787,0.000787,0.000784,0.000782
2023-08-01 12:00:00,1.09565,-0.001742,1,-0.000224,-0.003375,-2.662682,0.000000,0.007475,24.221453,0.000700,...,33.105802,19.223986,0.000707,0.000724,0.000726,0.000770,0.000792,0.000787,0.000787,0.000784
2023-08-01 13:00:00,1.09752,0.001705,0,-0.000152,-0.003527,-1.623451,-0.001704,0.005758,39.306931,0.000713,...,37.964775,33.105802,0.000700,0.000707,0.000724,0.000726,0.000770,0.000792,0.000787,0.000787


In [7]:
split = int(len(df) * 0.8)
training_set = df.iloc[:split].copy()
test_set = df.iloc[split:].copy()

mu = training_set.mean()
sigma = training_set.std()
standardized_training_set = (training_set - mu) / sigma

test_mu = test_set.mean()
test_sigma = test_set.std()
standardized_test_set = (test_set - test_mu) / test_sigma

X_train = standardized_training_set[columns]
X_test = standardized_test_set[columns]
y_train = training_set['Direction']
y_test = test_set['Direction']

y_train

time
2015-05-26 00:00:00    1
2015-05-26 01:00:00    1
2015-05-26 02:00:00    0
2015-05-26 03:00:00    1
2015-05-26 04:00:00    1
                      ..
2021-12-13 19:00:00    1
2021-12-13 20:00:00    1
2021-12-13 21:00:00    0
2021-12-13 22:00:00    1
2021-12-13 23:00:00    0
Name: Direction, Length: 41306, dtype: int32

In [8]:
param_grid = {
    'hl': [1, 2, 3],
    'hu': [50, 100, 150],
    'use_dropout': [True, False],
    'dropout_rate': [0.2, 0.3, 0.4],
    'regularize': [True, False],
    'reg': [l1(0.0001), l2(0.0001)],
}

In [9]:
grid_search = GridSearchCV(
    estimator=create_keras_model(input_dim=X_train.shape[1]),
    param_grid=param_grid,
    cv=StratifiedKFold(n_splits=3),  # Use StratifiedKFold since it's a classification task
    scoring='accuracy',
    verbose=1,
    n_jobs=-1
)

In [10]:
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 216 candidates, totalling 648 fits


  X, y = self._initialize(X, y)


In [14]:
best_params = grid_search.best_params_

In [15]:
accuracy = best_model.score(X_test, y_test)
print("Best Parameters:", best_params)

Best Parameters: {'dropout_rate': 0.2, 'hl': 3, 'hu': 150, 'reg': <keras.src.regularizers.L2 object at 0x00000212DFEC8D50>, 'regularize': True, 'use_dropout': True}
