In [1]:
# IMPORTING LIBRARIES 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier 
import sklearn.datasets
import sklearn.preprocessing
import sklearn.random_projection
import sklearn.neighbors

In [2]:
# starting with these parameters
n = 21
profit_taking = 0.0020
p = 0.68

In [3]:
# READ IN DATA
df_second_ten = pd.read_csv("data/EUR_GBP_2015_2005.csv")
df_first_ten = pd.read_csv("data/EUR_GBP_2025_2015.csv")

df_original = pd.concat([df_second_ten, df_first_ten], ignore_index = True)

df_original["Date"] = pd.to_datetime(df_original["Date"], format="%m/%d/%y")

df_original = df_original.sort_values(by =  "Date", ascending=True)

df_original = df_original.rename(columns = {'Price': 'Close'})
df_original = df_original[['Date', 'Close', 'Open', 'High', 'Low', 'Change %']]
df = df_original[['Date', 'Close', 'Open', 'High', 'Low', 'Change %']].copy()
df.head()

Unnamed: 0,Date,Close,Open,High,Low,Change %
2629,2005-09-16,0.6765,0.6768,0.679,0.6746,0.00%
2628,2005-09-19,0.6735,0.6747,0.6759,0.6723,-0.44%
2627,2005-09-20,0.6736,0.6734,0.6753,0.6725,0.01%
2626,2005-09-21,0.675,0.6737,0.6763,0.6727,0.21%
2625,2005-09-22,0.6786,0.675,0.68,0.6743,0.53%


In [4]:
df["target"] = (df["High"] > (profit_taking+1)*df["Open"]).astype(int)
target = df[["target"]]

In [5]:
# CREATE PRIOR n DAYS FEATURE

for before in range(1, n+1):
    df[f"Close_{before}_before"] = df["Close"].shift(before)
    df[f"Open_{before}_before"] = df["Open"].shift(before)
    df[f"High_{before}_before"] = df["High"].shift(before)
    df[f"Low_{before}_before"] = df["Low"].shift(before)

df = df.dropna()

In [6]:
train_df   = df[(df['Date'] >= '2016-01-01') & (df['Date'] < '2020-01-01')]
val_df   = df[(df['Date'] >= '2020-01-01') & (df['Date'] < '2022-01-01')]
test_df  = df[df['Date'] >= '2022-01-01']

x_train = train_df.drop(columns=['target', 'Date', 'Change %', 'Close', 'Open', 'High', 'Low'])
y_train = train_df['target']

x_val = val_df.drop(columns=['target', 'Date', 'Change %', 'Close', 'Open', 'High', 'Low'])
y_val = val_df['target']

x_test = test_df.drop(columns=['target', 'Date', 'Change %', 'Close', 'Open', 'High', 'Low'])
y_test = test_df['target']

In [7]:
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

(1043, 84)
(523, 84)
(1011, 84)


In [8]:
# Scaling the data
scaler = StandardScaler()
scaler.fit(x_train)   

x_train = scaler.transform(x_train)
x_val   = scaler.transform(x_val)
x_test  = scaler.transform(x_test)


In [9]:
# MODEL TRAINING
model1 = sklearn.neural_network.MLPClassifier(
    hidden_layer_sizes=[128, 128],
    activation='relu',
    alpha=1e-3,
    max_iter=10000,
    tol=1e-6,
    verbose=1, 
)

model1.fit(x_train, y_train)

validation_accuracy = model1.score(x_val, y_val)
print(f"validation_accuracy1={validation_accuracy:0.4f}")
train_accuracy = model1.score(x_train, y_train)
print(f"train_accuracy1={train_accuracy:0.4f}")

#for k in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]:
    
    #model2 = sklearn.neighbors.KNeighborsClassifier(
    #n_neighbors=k,
    #weights='uniform',
    #algorithm='ball_tree',
    #)

    #model2.fit(x_train, y_train)

    #validation_accuracy = model2.score(x_val, y_val)
    #print(k, f"validation_accuracy2={validation_accuracy:0.4f}")
    #train_accuracy = model2.score(x_train, y_train)
    #print(k, f"train_accuracy2={train_accuracy:0.4f}")


    
model2 = sklearn.neighbors.KNeighborsClassifier(
    n_neighbors=3,
    weights='uniform',
    algorithm='ball_tree',
    )

model2.fit(x_train, y_train)

validation_accuracy = model2.score(x_val, y_val)
print(f"validation_accuracy2={validation_accuracy:0.4f}")
train_accuracy = model2.score(x_train, y_train)
print(f"train_accuracy2={train_accuracy:0.4f}")

Iteration 1, loss = 0.66130573
Iteration 2, loss = 0.61188741
Iteration 3, loss = 0.59967224
Iteration 4, loss = 0.59970834
Iteration 5, loss = 0.58598355
Iteration 6, loss = 0.58470275
Iteration 7, loss = 0.58431822
Iteration 8, loss = 0.57812529
Iteration 9, loss = 0.57684737
Iteration 10, loss = 0.57532449
Iteration 11, loss = 0.57124050
Iteration 12, loss = 0.56744683
Iteration 13, loss = 0.56824385
Iteration 14, loss = 0.56547902
Iteration 15, loss = 0.56775861
Iteration 16, loss = 0.56859325
Iteration 17, loss = 0.57774994
Iteration 18, loss = 0.57092781
Iteration 19, loss = 0.57455830
Iteration 20, loss = 0.57688437
Iteration 21, loss = 0.56533942
Iteration 22, loss = 0.56660418
Iteration 23, loss = 0.57340014
Iteration 24, loss = 0.56420163
Iteration 25, loss = 0.56085250
Iteration 26, loss = 0.56850726
Iteration 27, loss = 0.57682035
Iteration 28, loss = 0.56460266
Iteration 29, loss = 0.55583423
Iteration 30, loss = 0.55771337
Iteration 31, loss = 0.55367733
Iteration 32, los

In [10]:
if True:
    test_accuracy = model2.score(x_test, y_test)
    print(f"test_accuracy={test_accuracy}")

    y_true = y_test.values.ravel()
    probs_test = model2.predict_proba(x_test)[:, 1]
    preds_test = model2.predict(x_test)

    test_accuracy = model1.score(x_test, y_test)
    print(f"test_accuracy={test_accuracy}")

    y_true = y_test.values.ravel()
    probs_test = model1.predict_proba(x_test)[:, 1]
    preds_test = model1.predict(x_test)

test_accuracy=0.5608308605341247
test_accuracy=0.5588526211671613


In [11]:
x_test = test_df.drop(columns=['target','Date','Change %','Close','Open','High','Low'])

x_test_scaled = scaler.transform(x_test)

df_evaluation = test_df.copy()

df_evaluation['prob'] = model2.predict_proba(x_test_scaled)[:, 1]

df_evaluation['enter'] = (df_evaluation['prob'] >= p).astype(int)

df_evaluation['pnl_if_hit']  = profit_taking
df_evaluation['pnl_if_miss'] = 0.0

df_evaluation['pnl'] = np.where(
    df_evaluation['enter'] == 1,
    np.where(df_evaluation['target'] == 1,
             df_evaluation['pnl_if_hit'],
             df_evaluation['pnl_if_miss']),
    0.0
)

In [12]:
if True:
    total_return = df_evaluation["pnl"].sum()
    num_trades = df_evaluation["enter"].sum()
    hit_rate = df_evaluation.loc[df_evaluation["enter"] == 1, "target"].mean()
    avg_return_per_trade = df_evaluation.loc[df_evaluation["enter"] == 1, "pnl"].mean()

    #print("Total return:", total_return)
    print("Number of trades:", num_trades)
    print("Hit rate:", hit_rate)
    print("Avg return per trade:", avg_return_per_trade)

Number of trades: 465
Hit rate: 0.6
Avg return per trade: 0.0012000000000000001


In [14]:
df_evaluation['prob'].describe()

count    1011.000000
mean        0.702605
std         0.332217
min         0.000000
25%         0.333333
50%         0.666667
75%         1.000000
max         1.000000
Name: prob, dtype: float64

In [15]:
#eval NN

x_test = test_df.drop(columns=['target','Date','Change %','Close','Open','High','Low'])

x_test_scaled = scaler.transform(x_test)

df_evaluation = test_df.copy()

df_evaluation['prob'] = model1.predict_proba(x_test_scaled)[:, 1]

df_evaluation['enter'] = (df_evaluation['prob'] >= p).astype(int)

df_evaluation['pnl_if_hit']  = profit_taking
df_evaluation['pnl_if_miss'] = 0.0

df_evaluation['pnl'] = np.where(
    df_evaluation['enter'] == 1,
    np.where(df_evaluation['target'] == 1,
             df_evaluation['pnl_if_hit'],
             df_evaluation['pnl_if_miss']),
    0.0
)

total_return = df_evaluation["pnl"].sum()
num_trades = df_evaluation["enter"].sum()
hit_rate = df_evaluation.loc[df_evaluation["enter"] == 1, "target"].mean()
avg_return_per_trade = df_evaluation.loc[df_evaluation["enter"] == 1, "pnl"].mean()

#print("Total return:", total_return)
print("Number of trades:", num_trades)
print("Hit rate:", hit_rate)
print("Avg return per trade:", avg_return_per_trade)

Number of trades: 73
Hit rate: 0.7671232876712328
Avg return per trade: 0.0015342465753424657
