In [221]:
# IMPORTING LIBRARIES 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier 

In [222]:
# starting with these parameters
n = 3
profit_taking = 0.005
p = 0.2

In [223]:
# READ IN DATA
df_second_ten = pd.read_csv("data/EUR_GBP_2015_2005.csv")
df_first_ten = pd.read_csv("data/EUR_GBP_2025_2015.csv")

df_original = pd.concat([df_second_ten, df_first_ten], ignore_index = True)

df_original["Date"] = pd.to_datetime(df_original["Date"], format="%m/%d/%y")

df_original = df_original.sort_values(by =  "Date", ascending=True)

df_original = df_original.rename(columns = {'Price': 'Close'})
df_original = df_original[['Date', 'Close', 'Open', 'High', 'Low', 'Change %']]
df = df_original[['Date', 'Close', 'Open', 'High', 'Low', 'Change %']].copy()
df.head()

Unnamed: 0,Date,Close,Open,High,Low,Change %
2629,2005-09-16,0.6765,0.6768,0.679,0.6746,0.00%
2628,2005-09-19,0.6735,0.6747,0.6759,0.6723,-0.44%
2627,2005-09-20,0.6736,0.6734,0.6753,0.6725,0.01%
2626,2005-09-21,0.675,0.6737,0.6763,0.6727,0.21%
2625,2005-09-22,0.6786,0.675,0.68,0.6743,0.53%


In [224]:
df["target"] = (df["High"] > (profit_taking+1)*df["Open"]).astype(int)

In [225]:
# CREATE PRIOR n DAYS FEATURE

for before in range(1, n+1):
    df[f"Close_{before}_before"] = df["Close"].shift(before)
    df[f"Open_{before}_before"] = df["Open"].shift(before)
    df[f"High_{before}_before"] = df["High"].shift(before)
    df[f"Low_{before}_before"] = df["Low"].shift(before)

df = df.dropna()
df = df.drop(columns = ["Date", "Change %", "Close", "Open", "High", "Low"])
df.head()

Unnamed: 0,target,Close_1_before,Open_1_before,High_1_before,Low_1_before,Close_2_before,Open_2_before,High_2_before,Low_2_before,Close_3_before,Open_3_before,High_3_before,Low_3_before
2626,0,0.6736,0.6734,0.6753,0.6725,0.6735,0.6747,0.6759,0.6723,0.6765,0.6768,0.679,0.6746
2625,1,0.675,0.6737,0.6763,0.6727,0.6736,0.6734,0.6753,0.6725,0.6735,0.6747,0.6759,0.6723
2624,0,0.6786,0.675,0.68,0.6743,0.675,0.6737,0.6763,0.6727,0.6736,0.6734,0.6753,0.6725
2623,0,0.6778,0.6788,0.6801,0.6769,0.6786,0.675,0.68,0.6743,0.675,0.6737,0.6763,0.6727
2622,0,0.6787,0.6791,0.6801,0.6774,0.6778,0.6788,0.6801,0.6769,0.6786,0.675,0.68,0.6743


In [226]:
target = df["target"]
df = df.drop(columns = "target")

print(target.shape)
print(df.shape)
print(target.value_counts())

(5259,)
(5259, 12)
target
0    3834
1    1425
Name: count, dtype: int64


In [227]:
# CREATING TRAINING, VALIDATION, and TESTING SETS
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

# ensure that the ratios sum to 1.0
epsilon = 1e-10
assert(1 - epsilon <= train_ratio + validation_ratio + test_ratio <= 1 + epsilon)

x_train0, x_test, y_train0, y_test = sklearn.model_selection.train_test_split(
    df,
    target,
    test_size=test_ratio,
    random_state=42,
    )
print(f"len(x_train0)={len(x_train0)}")
print(f"len(x_test)={len(x_test)}")

x_train, x_val, y_train, y_val = sklearn.model_selection.train_test_split(
    x_train0,
    y_train0,
    test_size=validation_ratio/(train_ratio + validation_ratio),
    random_state=0,
    )
print(f"len(x_train)={len(x_train)}")
print(f"len(x_val)={len(x_val)}")

len(x_train0)=4733
len(x_test)=526
len(x_train)=3944
len(x_val)=789


In [228]:
# Scaling the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.fit_transform(x_val)
x_test = scaler.transform(x_test)

In [229]:
# MODEL TRAINING
model = sklearn.neural_network.MLPClassifier(
    hidden_layer_sizes=[64, 128],
    activation='relu',
    alpha=1e-6,
    max_iter=1000,
    tol=1e-6,
    verbose=1,
    )

model.fit(x_train, y_train)

validation_accuracy = model.score(x_val, y_val)
print(f"validation_accuracy={validation_accuracy:0.4f}")
train_accuracy = model.score(x_train, y_train)
print(f"train_accuracy={train_accuracy:0.4f}")

Iteration 1, loss = 0.63030189
Iteration 2, loss = 0.59132665
Iteration 3, loss = 0.58376629
Iteration 4, loss = 0.58031711
Iteration 5, loss = 0.57980032
Iteration 6, loss = 0.57922081
Iteration 7, loss = 0.57747129
Iteration 8, loss = 0.57772909
Iteration 9, loss = 0.57797080
Iteration 10, loss = 0.57627272
Iteration 11, loss = 0.57490871
Iteration 12, loss = 0.57410706
Iteration 13, loss = 0.57382596
Iteration 14, loss = 0.57405579
Iteration 15, loss = 0.57524901
Iteration 16, loss = 0.57446963
Iteration 17, loss = 0.57019424
Iteration 18, loss = 0.56980645
Iteration 19, loss = 0.56850030
Iteration 20, loss = 0.56936731
Iteration 21, loss = 0.56669813
Iteration 22, loss = 0.56632852
Iteration 23, loss = 0.56609607
Iteration 24, loss = 0.56360908
Iteration 25, loss = 0.56340245
Iteration 26, loss = 0.56258236
Iteration 27, loss = 0.56269530
Iteration 28, loss = 0.56251228
Iteration 29, loss = 0.56024373
Iteration 30, loss = 0.56337926
Iteration 31, loss = 0.55924786
Iteration 32, los

In [230]:
if True:
    model.fit(x_train0, y_train0)

    test_accuracy = model.score(x_test, y_test)
    print(f"test_accuracy={test_accuracy}")

    y_true = y_test.values.ravel()
    probs_test = model.predict_proba(x_test)[:, 1]
    preds_test = model.predict(x_test)

Iteration 1, loss = 0.59061936
Iteration 2, loss = 0.59006619
Iteration 3, loss = 0.59106609
Iteration 4, loss = 0.58904024
Iteration 5, loss = 0.58895625
Iteration 6, loss = 0.59068202
Iteration 7, loss = 0.58953734
Iteration 8, loss = 0.58844357
Iteration 9, loss = 0.58871882
Iteration 10, loss = 0.58862456
Iteration 11, loss = 0.58862245
Iteration 12, loss = 0.58814273
Iteration 13, loss = 0.58842906
Iteration 14, loss = 0.58878456
Iteration 15, loss = 0.58814845
Iteration 16, loss = 0.58774882
Iteration 17, loss = 0.58808785
Iteration 18, loss = 0.58778063
Iteration 19, loss = 0.58827721
Iteration 20, loss = 0.58783030
Iteration 21, loss = 0.58776121
Iteration 22, loss = 0.58837188
Iteration 23, loss = 0.58859817
Iteration 24, loss = 0.58854901
Iteration 25, loss = 0.58736943
Iteration 26, loss = 0.58697975
Iteration 27, loss = 0.58870083
Iteration 28, loss = 0.58729601
Iteration 29, loss = 0.58724351
Iteration 30, loss = 0.58653455
Iteration 31, loss = 0.58665175
Iteration 32, los



In [231]:
if True:
    df["prob"] = model.predict_proba(df)[:, 1]
    df = df.reset_index()      
    df_original = df_original.reset_index()
    target = target.reset_index()
    df = df.reset_index()      
    df_original = df_original.reset_index()
    target = target.reset_index()
    df_evaluation["enter"] = (df_evaluation["prob"] >= p).astype(int)

In [232]:
if True:
    df_evaluation["pnl_if_hit"] = profit_taking
    df_evaluation["pnl_if_miss"] = (
        (df_evaluation["Close"] - df_evaluation["Open"]) / df_evaluation["Open"]
    )

    df_evaluation["pnl"] = np.where(
        df_evaluation["enter"] == 1,                        
        np.where(
            df_evaluation["target"] == 1,                   
            df_evaluation["pnl_if_hit"],                    
            df_evaluation["pnl_if_miss"]                     
        ),
        0                                                      
    )

In [233]:
if True:
    total_return = df_evaluation["pnl"].sum()
    num_trades = df_evaluation["enter"].sum()
    hit_rate = df_evaluation.loc[df_evaluation["enter"] == 1, "target"].mean()
    avg_return_per_trade = df_evaluation.loc[df_evaluation["enter"] == 1, "pnl"].mean()

    print("Total return:", total_return)
    print("Number of trades:", num_trades)
    print("Hit rate:", hit_rate)
    print("Avg return per trade:", avg_return_per_trade)

Total return: -0.4214966853410814
Number of trades: 1110
Hit rate: 0.3045045045045045
Avg return per trade: -0.0003797267435505239


In [234]:
df_evaluation['prob'].describe()

count    5259.000000
mean        0.268316
std         0.002276
min         0.262428
25%         0.267314
50%         0.269119
75%         0.269861
max         0.272740
Name: prob, dtype: float64