In [652]:
# IMPORTING LIBRARIES 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier 
import sklearn.datasets
import sklearn.preprocessing
import sklearn.random_projection
import sklearn.neighbors

In [653]:
# starting with these parameters
n = 21
profit_taking = 0.0020
p = 0.66

In [654]:
# READ IN DATA
df_second_ten = pd.read_csv("data/EUR_GBP_2015_2005.csv")
df_first_ten = pd.read_csv("data/EUR_GBP_2025_2015.csv")

df_original = pd.concat([df_second_ten, df_first_ten], ignore_index = True)

df_original["Date"] = pd.to_datetime(df_original["Date"], format="%m/%d/%y")

df_original = df_original.sort_values(by =  "Date", ascending=True)

df_original = df_original.rename(columns = {'Price': 'Close'})
df_original = df_original[['Date', 'Close', 'Open', 'High', 'Low', 'Change %']]
df = df_original[['Date', 'Close', 'Open', 'High', 'Low', 'Change %']].copy()
df.head()

Unnamed: 0,Date,Close,Open,High,Low,Change %
2629,2005-09-16,0.6765,0.6768,0.679,0.6746,0.00%
2628,2005-09-19,0.6735,0.6747,0.6759,0.6723,-0.44%
2627,2005-09-20,0.6736,0.6734,0.6753,0.6725,0.01%
2626,2005-09-21,0.675,0.6737,0.6763,0.6727,0.21%
2625,2005-09-22,0.6786,0.675,0.68,0.6743,0.53%


In [655]:
df["target"] = (df["High"] > (profit_taking+1)*df["Open"]).astype(int)

In [656]:
# CREATE PRIOR n DAYS FEATURE

for before in range(1, n+1):
    df[f"Close_{before}_before"] = df["Close"].shift(before)
    df[f"Open_{before}_before"] = df["Open"].shift(before)
    df[f"High_{before}_before"] = df["High"].shift(before)
    df[f"Low_{before}_before"] = df["Low"].shift(before)

df = df.dropna()

In [663]:
train_df   = df[(df['Date'] >= '2016-01-01') & (df['Date'] < '2020-01-01')]
val_df   = df[(df['Date'] >= '2020-01-01') & (df['Date'] < '2022-01-01')]
test_df  = df[df['Date'] >= '2022-01-01']

x_train = train_df.drop(columns=['target', 'Date', 'Change %', 'Close', 'Open', 'High', 'Low'])
y_train = train_df['target']

x_val = val_df.drop(columns=['target', 'Date', 'Change %', 'Close', 'Open', 'High', 'Low'])
y_val = val_df['target']

x_test = test_df.drop(columns=['target', 'Date', 'Change %', 'Close', 'Open', 'High', 'Low'])
y_test = test_df['target']

In [664]:
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

(1043, 84)
(523, 84)
(1011, 84)


In [665]:
# Scaling the data
scaler = StandardScaler()
scaler.fit(x_train)   

x_train = scaler.transform(x_train)
x_val   = scaler.transform(x_val)
x_test  = scaler.transform(x_test)


In [666]:
# MODEL TRAINING
model1 = sklearn.neural_network.MLPClassifier(
    hidden_layer_sizes=[128, 128],
    activation='relu',
    alpha=1e-3,
    max_iter=10000,
    tol=1e-6,
    verbose=1, 
)

model1.fit(x_train, y_train)

validation_accuracy = model1.score(x_val, y_val)
print(f"validation_accuracy1={validation_accuracy:0.4f}")
train_accuracy = model1.score(x_train, y_train)
print(f"train_accuracy1={train_accuracy:0.4f}")

#for k in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]:
    
    #model2 = sklearn.neighbors.KNeighborsClassifier(
    #n_neighbors=k,
    #weights='uniform',
    #algorithm='ball_tree',
    #)

    #model2.fit(x_train, y_train)

    #validation_accuracy = model2.score(x_val, y_val)
    #print(k, f"validation_accuracy2={validation_accuracy:0.4f}")
    #train_accuracy = model2.score(x_train, y_train)
    #print(k, f"train_accuracy2={train_accuracy:0.4f}")


    
model2 = sklearn.neighbors.KNeighborsClassifier(
    n_neighbors=3,
    weights='uniform',
    algorithm='ball_tree',
    )

model2.fit(x_train, y_train)

validation_accuracy = model2.score(x_val, y_val)
print(f"validation_accuracy2={validation_accuracy:0.4f}")
train_accuracy = model2.score(x_train, y_train)
print(f"train_accuracy2={train_accuracy:0.4f}")

Iteration 1, loss = 0.66003072
Iteration 2, loss = 0.61549174
Iteration 3, loss = 0.60671859
Iteration 4, loss = 0.59340397
Iteration 5, loss = 0.58732321
Iteration 6, loss = 0.58048856
Iteration 7, loss = 0.58401356
Iteration 8, loss = 0.57647641
Iteration 9, loss = 0.57452040
Iteration 10, loss = 0.57310816
Iteration 11, loss = 0.57264472
Iteration 12, loss = 0.57252680
Iteration 13, loss = 0.57267629
Iteration 14, loss = 0.57191362
Iteration 15, loss = 0.57084916
Iteration 16, loss = 0.57319400
Iteration 17, loss = 0.56365881
Iteration 18, loss = 0.56631577
Iteration 19, loss = 0.56296689
Iteration 20, loss = 0.56333461
Iteration 21, loss = 0.56087272
Iteration 22, loss = 0.56273591
Iteration 23, loss = 0.56558686
Iteration 24, loss = 0.56141786
Iteration 25, loss = 0.55710120
Iteration 26, loss = 0.55438550
Iteration 27, loss = 0.55706885
Iteration 28, loss = 0.55718008
Iteration 29, loss = 0.56449533
Iteration 30, loss = 0.56836403
Iteration 31, loss = 0.55378288
Iteration 32, los

In [661]:
if True:
    test_accuracy = model2.score(x_test, y_test)
    print(f"test_accuracy={test_accuracy}")

    y_true = y_test.values.ravel()
    probs_test = model2.predict_proba(x_test)[:, 1]
    preds_test = model2.predict(x_test)

    test_accuracy = model1.score(x_test, y_test)
    print(f"test_accuracy={test_accuracy}")

    y_true = y_test.values.ravel()
    probs_test = model1.predict_proba(x_test)[:, 1]
    preds_test = model1.predict(x_test)

test_accuracy=0.5608308605341247
test_accuracy=0.5539070227497527


In [668]:
print(df_original.columns)
print(target.columns)

Index(['level_0', 'index', 'Date', 'Close', 'Open', 'High', 'Low', 'Change %'], dtype='object')
Index(['level_0', 'index', 'target'], dtype='object')


In [669]:
if True:

    # 1. Build feature matrix exactly like training
    drop_cols = ['target','Date','Change %','Close','Open','High','Low']
    X_all = df.drop(columns=drop_cols)

    # 2. Apply the same scaler used earlier
    X_all_scaled = scaler.transform(X_all)

    # 3. Compute probabilities safely
    df_probs = pd.DataFrame({
        'index': df.index,
        'prob': model1.predict_proba(X_all_scaled)[:, 1]
    })

    # ⚠️ --- Add these 2 lines to fix the error ---
    df_original = df_original.drop(columns=['level_0','index'], errors='ignore')
    target      = target.drop(columns=['level_0','index'], errors='ignore')
    # --------------------------------------------

    # 4. Reset original dfs for merging
    df_original = df_original.reset_index()
    target      = target.reset_index()

    # 5. Merge back
    df_evaluation = df_original.merge(df_probs, on='index', how='left')
    df_evaluation = df_evaluation.merge(target, on='index', how='left')

    # 6. Compute enter column
    df_evaluation["enter"] = (df_evaluation["prob"] >= p).astype(int)



In [670]:
if True:
    df_evaluation["pnl_if_hit"] = profit_taking
    df_evaluation["pnl_if_miss"] = (
        (df_evaluation["Close"] - df_evaluation["Open"]) / df_evaluation["Open"]
    )

    df_evaluation["pnl"] = np.where(
        df_evaluation["enter"] == 1,                        
        np.where(
            df_evaluation["target"] == 1,                   
            df_evaluation["pnl_if_hit"],                    
            df_evaluation["pnl_if_miss"]                     
        ),
        0                                                      
    )

In [671]:
if True:
    total_return = df_evaluation["pnl"].sum()
    num_trades = df_evaluation["enter"].sum()
    hit_rate = df_evaluation.loc[df_evaluation["enter"] == 1, "target"].mean()
    avg_return_per_trade = df_evaluation.loc[df_evaluation["enter"] == 1, "pnl"].mean()

    print("Total return:", total_return)
    print("Number of trades:", num_trades)
    print("Hit rate:", hit_rate)
    print("Avg return per trade:", avg_return_per_trade)

Total return: 4.332270010450365
Number of trades: 3620
Hit rate: 0.6526696329254728
Avg return per trade: 0.0011967596713951284


In [672]:
df_evaluation['prob'].describe()

count    5241.000000
mean        0.721483
std         0.322450
min         0.000028
25%         0.550579
50%         0.867016
75%         0.978962
max         1.000000
Name: prob, dtype: float64