In [3]:
import MetaTrader5 as mt
import pandas as pd
import plotly.express as px
import matplotlib.pylab as plt
import numpy as np
import talib
from talipp.indicators import EMA, SMA, Stoch, DPO
from joblib import dump
from datetime import datetime
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, confusion_matrix, classification_report
from own_functions import *
import os
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import roll_time_series, make_forecasting_frame
from tsfresh.utilities.dataframe_functions import impute

# Initialize MT5 and login
mt.initialize()
login = 51708234
password = "4bM&wuVJcBTnjV"
server = "ICMarketsEU-Demo"
mt.login(login, password, server)

symbol = "GBPUSD"

timeframe = mt.TIMEFRAME_D1

# Load new data (adjust the dates as needed)
ohlc_data = pd.DataFrame(mt.copy_rates_range(symbol, timeframe, datetime(2023, 1, 1), datetime(2024, 8, 14)))
ohlc_data['time'] = pd.to_datetime(ohlc_data['time'], unit='s')
df = ohlc_data[['time', 'open', 'high', 'low', 'close']].copy()

# Function to add rolling features (as defined in your original code)
def add_rolling_features(df, window):
    df['rolling_mean_open'] = df['open'].rolling(window=window).mean()
    df['rolling_std_open'] = df['open'].rolling(window=window).std()
    df['rolling_mean_close'] = df['close'].rolling(window=window).mean()
    df['rolling_std_close'] = df['close'].rolling(window=window).std()
    df['rolling_mean_high'] = df['high'].rolling(window=window).mean()
    df['rolling_std_high'] = df['high'].rolling(window=window).std()
    df['rolling_mean_low'] = df['low'].rolling(window=window).mean()
    df['rolling_std_low'] = df['low'].rolling(window=window).std()
    return df

# Function to add lag features (as defined in your original code)
def add_lag_features(df, lags):
    for lag in lags:
        df[f'open_lag_{lag}'] = df['open'].shift(lag)
        df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df[f'high_lag_{lag}'] = df['high'].shift(lag)
        df[f'low_lag_{lag}'] = df['low'].shift(lag)
    return df


# Apply technical indicators
df['WILLR_15'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=15)
df['WILLR_23'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=23)
df['WILLR_42'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=42)
df['WILLR_145'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=145)

# Apply rolling and lag features
df = add_rolling_features(df, window=5)
df = add_lag_features(df, lags=[1, 2, 3, 4, 5])

# Drop NaNs created by rolling and lag features
df = df.dropna().reset_index(drop=True)

df['b_flag'] = 0
df['s_flag'] = 0

StopLoss = 1
TakeProfit = 1
BreakEvenRatio=StopLoss/(StopLoss+TakeProfit)
label_data(df,[StopLoss],[TakeProfit],80,symbol,False)

df.drop(columns=['s_flag'], inplace=True)

# Extract features using tsfresh for WILLR_15
df_melted_1 = df[['time', 'WILLR_15']].copy()
df_melted_1["Symbols"] = symbol
df_rolled_1 = roll_time_series(df_melted_1, column_id="Symbols", column_sort="time", max_timeshift=20, min_timeshift=5)
X1 = extract_features(df_rolled_1.drop("Symbols", axis=1), column_id="id", column_sort="time", column_value="WILLR_15", impute_function=impute, show_warnings=False)
X1 = X1.set_index(X1.index.map(lambda x: x[1]), drop=True)
X1.index.name = "time"

# Extract features using tsfresh for WILLR_42
df_melted_2 = df[['time', 'WILLR_42']].copy()
df_melted_2["Symbols"] = symbol
df_rolled_2 = roll_time_series(df_melted_2, column_id="Symbols", column_sort="time", max_timeshift=20, min_timeshift=5)
X2 = extract_features(df_rolled_2.drop("Symbols", axis=1), column_id="id", column_sort="time", column_value="WILLR_42", impute_function=impute, show_warnings=False)
X2 = X2.set_index(X2.index.map(lambda x: x[1]), drop=True)
X2.index.name = "time"

# Combine features from WILLR_15 and WILLR_42
X_new = pd.concat([X1, X2], axis=1, join='inner')
X_new = X_new.dropna()

# Align and merge with original dataframe based on the time index
df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
df = df[df.index.isin(X_new.index)]
X_new = pd.concat([X_new, df], axis=1, join='inner')


# List of selected feature names from the training phase
selected_feature_names_X = ['WILLR_42__mean_second_derivative_central', 'WILLR_15__mean_second_derivative_central', 'WILLR_15', 'WILLR_15__agg_linear_trend__attr_"slope"__chunk_len_10__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_7', 'WILLR_15__fft_coefficient__attr_"real"__coeff_10', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_7', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_6', 'WILLR_15__fft_coefficient__attr_"real"__coeff_9', 'WILLR_42__fft_coefficient__attr_"real"__coeff_10', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.0', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_6', 'WILLR_42', 'WILLR_42__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"mean"', 'WILLR_15__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_5', 'WILLR_15__energy_ratio_by_chunks__num_segments_10__segment_focus_9', 'WILLR_42__fft_coefficient__attr_"real"__coeff_9', 'WILLR_15__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"min"', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_7', 'WILLR_42__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"min"', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_8', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_8', 'WILLR_42__agg_linear_trend__attr_"slope"__chunk_len_10__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_6', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_7', 'WILLR_15__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"min"', 'WILLR_15__index_mass_quantile__q_0.9', 'WILLR_15__fft_coefficient__attr_"real"__coeff_8', 'WILLR_15__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"max"', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_5', 'WILLR_145', 'WILLR_42__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_4', 'WILLR_42__fft_coefficient__attr_"real"__coeff_8', 'WILLR_42__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"max"', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_6', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_0.8__ql_0.0', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_9', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_8', 'WILLR_42__energy_ratio_by_chunks__num_segments_10__segment_focus_9', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.2', 'WILLR_42__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"min"', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_0.6__ql_0.0', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_4', 'WILLR_15__number_peaks__n_1', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_0.6__ql_0.0', 'WILLR_15__agg_linear_trend__attr_"stderr"__chunk_len_5__f_agg_"min"', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_9', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_5', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_8', 'WILLR_42__index_mass_quantile__q_0.9', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_9', 'WILLR_15__fft_coefficient__attr_"real"__coeff_7', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.4', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_0.4__ql_0.0', 'WILLR_15__agg_linear_trend__attr_"intercept"__chunk_len_10__f_agg_"max"', 'WILLR_15__time_reversal_asymmetry_statistic__lag_1', 'WILLR_42__binned_entropy__max_bins_10', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_3', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_4', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_5', 'WILLR_15__index_mass_quantile__q_0.8', 'WILLR_15__agg_linear_trend__attr_"intercept"__chunk_len_10__f_agg_"min"', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.2', 'WILLR_15__agg_linear_trend__attr_"stderr"__chunk_len_5__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_9', 'WILLR_42__number_cwt_peaks__n_1', 'WILLR_15__fft_coefficient__attr_"real"__coeff_3', 'WILLR_42__fft_coefficient__attr_"real"__coeff_1', 'WILLR_42__number_peaks__n_1', 'WILLR_15__fft_coefficient__attr_"real"__coeff_1', 'WILLR_42__approximate_entropy__m_2__r_0.1', 'WILLR_42__fft_coefficient__attr_"real"__coeff_3', 'WILLR_42__agg_linear_trend__attr_"stderr"__chunk_len_5__f_agg_"max"', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_4', 'WILLR_42__agg_linear_trend__attr_"stderr"__chunk_len_5__f_agg_"min"', 'WILLR_42__fft_coefficient__attr_"real"__coeff_7', 'WILLR_15__symmetry_looking__r_0.2', 'WILLR_15__number_cwt_peaks__n_1', 'WILLR_42__energy_ratio_by_chunks__num_segments_10__segment_focus_5', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_0.8__ql_0.0', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.4', 'WILLR_42__energy_ratio_by_chunks__num_segments_10__segment_focus_4', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_0.2__ql_0.0', 'WILLR_15__ratio_beyond_r_sigma__r_3', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_0.4__ql_0.0', 'WILLR_15__energy_ratio_by_chunks__num_segments_10__segment_focus_4', 'WILLR_42__first_location_of_maximum', 'WILLR_15__longest_strike_below_mean', 'WILLR_42__fft_coefficient__attr_"real"__coeff_4', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_3', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_3', 'WILLR_15__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.6', 'WILLR_15__first_location_of_maximum', 'WILLR_42__energy_ratio_by_chunks__num_segments_10__segment_focus_6', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_0.2__ql_0.0', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_0.6__ql_0.2', 'WILLR_15__fft_coefficient__attr_"real"__coeff_2', 'WILLR_42__fft_coefficient__attr_"real"__coeff_2', 'WILLR_15__fft_coefficient__attr_"real"__coeff_4', 'WILLR_15__energy_ratio_by_chunks__num_segments_10__segment_focus_5', 'WILLR_42__change_quantiles__f_agg_"mean"__isabs_False__qh_1.0__ql_0.8', 'WILLR_42__index_mass_quantile__q_0.8', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_2', 'WILLR_42__lempel_ziv_complexity__bins_10', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_3','b_flag']

# Select the same features
X_new_selected = X_new[selected_feature_names_X]

X_df = X_new_selected

X_df

Mean Candle: 0.008161159420289851


Rolling: 100%|██████████| 28/28 [00:09<00:00,  3.07it/s]
Feature Extraction: 100%|██████████| 28/28 [00:13<00:00,  2.07it/s]
Rolling: 100%|██████████| 28/28 [00:07<00:00,  3.78it/s]
Feature Extraction: 100%|██████████| 28/28 [00:12<00:00,  2.17it/s]


Unnamed: 0_level_0,WILLR_42__mean_second_derivative_central,WILLR_15__mean_second_derivative_central,WILLR_15,"WILLR_15__agg_linear_trend__attr_""slope""__chunk_len_10__f_agg_""max""","WILLR_15__fft_coefficient__attr_""imag""__coeff_7","WILLR_15__fft_coefficient__attr_""real""__coeff_10","WILLR_42__fft_coefficient__attr_""imag""__coeff_7","WILLR_15__fft_coefficient__attr_""imag""__coeff_6","WILLR_15__fft_coefficient__attr_""real""__coeff_9","WILLR_42__fft_coefficient__attr_""real""__coeff_10",...,"WILLR_15__fft_coefficient__attr_""real""__coeff_2","WILLR_42__fft_coefficient__attr_""real""__coeff_2","WILLR_15__fft_coefficient__attr_""real""__coeff_4",WILLR_15__energy_ratio_by_chunks__num_segments_10__segment_focus_5,"WILLR_42__change_quantiles__f_agg_""mean""__isabs_False__qh_1.0__ql_0.8",WILLR_42__index_mass_quantile__q_0.8,"WILLR_15__fft_coefficient__attr_""imag""__coeff_2",WILLR_42__lempel_ziv_complexity__bins_10,"WILLR_42__fft_coefficient__attr_""angle""__coeff_3",b_flag
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-07-28,1.120608,1.970835,-75.478927,-14.025987,-3.160805,-3.666646,-1.585308,-1.964471,-4.860958,-0.377216,...,36.179312,15.406422,-0.315426,0.225132,4.834356,0.833333,21.331806,0.833333,180.000000,1
2023-07-31,0.196074,0.240156,-81.017664,-14.025987,-3.160805,-3.666646,-1.585308,-1.964471,-4.860958,-0.377216,...,8.630465,0.904620,-0.315426,0.178763,4.834356,0.857143,29.535808,0.857143,-55.445215,0
2023-08-01,-0.341753,-0.194719,-91.294587,-14.025987,-3.160805,-3.666646,-1.585308,-1.964471,-4.860958,-0.377216,...,-11.152810,-9.363225,1.977432,0.141704,4.834356,0.875000,9.403217,0.875000,-12.812913,0
2023-08-02,-0.483757,0.406378,-93.545592,-14.025987,-3.160805,-3.666646,-1.585308,-1.964471,-4.860958,-0.377216,...,-13.960390,-9.946586,16.927339,0.116374,4.834356,0.888889,-10.803578,0.888889,7.223348,0
2023-08-03,0.188101,1.149510,-83.093732,-14.025987,-3.160805,-3.666646,-1.585308,-1.964471,-4.860958,-0.377216,...,-7.450501,-0.041321,33.504893,0.101989,4.834356,0.900000,-12.937745,0.800000,37.253700,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-08-07,0.166602,0.288941,-94.210990,-46.935860,5.344205,74.101885,6.677670,-30.130254,34.172842,61.122383,...,25.842080,5.145971,18.408746,0.127805,-2.580810,0.952381,-124.390756,0.619048,-80.217718,0
2024-08-08,0.103474,0.351620,-70.924956,-35.105241,-16.894862,-2.892131,-11.408440,-3.009975,46.165824,4.658090,...,33.102906,40.843398,33.294145,0.122459,0.325131,0.904762,-127.981437,0.666667,-67.722461,0
2024-08-09,0.226551,0.283849,-66.546503,-32.916014,-36.866722,57.486590,-44.346734,-23.036035,3.595886,50.741994,...,53.252410,80.563663,56.079044,0.136443,-5.706460,0.904762,-118.589552,0.666667,-49.371995,0
2024-08-12,0.007989,0.050161,-63.252570,-28.759042,-0.397995,6.950276,0.311023,-47.708160,59.607063,14.967560,...,59.131680,96.059110,40.741647,0.149007,-5.472677,0.904762,-103.214099,0.714286,-39.716807,0


In [4]:
from joblib import load
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score, f1_score, roc_auc_score

scaler = load('scaler_fold_4.joblib')
model = load('model_fold_4.joblib')

X_df = X_df.iloc[:, :-1].values  # These are your features
X_test = scaler.transform(X_df)  # Apply scaler only on features, not on labels

y_test_pred = model.predict(X_test)
y_test = X_new_selected['b_flag'].values  # Assuming 'b_flag' is your binary label

from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score, f1_score

print("Confusion Matrix (Testing Data):")
cm_test = confusion_matrix(y_test, y_test_pred)
print(cm_test)

test_false_positives = cm_test[0][1]
test_true_positives = cm_test[1][1]

test_precision = precision_score(y_test, y_test_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)

print('Testing Data Results:')
print('WIN/LOSS-Diff:', round(100 * (test_precision - BreakEvenRatio), 2), '%')
print('False Positives:', test_false_positives)
print('True Positives:', test_true_positives)
print('Precision:', test_precision)
print('Accuracy:', test_accuracy)
print('Recall:', test_recall)
print('F1 Score:', test_f1)
print('Ratio Total:', round(100 * (test_true_positives / (test_false_positives + test_true_positives)), 2))
print('BreakEvenRatio:', round(BreakEvenRatio, 2))


df_pred = pd.DataFrame(index=X_new_selected.index)
df_pred['prediction'] = y_test_pred
df_pred.to_csv('GBPUSD_D1_3112_Buy.csv')



Confusion Matrix (Testing Data):
[[86 95]
 [ 9 81]]
Testing Data Results:
WIN/LOSS-Diff: -3.98 %
False Positives: 95
True Positives: 81
Precision: 0.4602272727272727
Accuracy: 0.6162361623616236
Recall: 0.9
F1 Score: 0.6090225563909774
Ratio Total: 46.02
BreakEvenRatio: 0.5
