In [5]:
import MetaTrader5 as mt
import pandas as pd
import numpy as np
import talib
from talipp.indicators import EMA, SMA, Stoch, DPO
from joblib import dump
from datetime import datetime
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    precision_score, confusion_matrix, classification_report,
    recall_score, accuracy_score, f1_score, roc_auc_score
)
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import roll_time_series, impute
from own_functions import label_data
from sklearn.decomposition import PCA

# Hardcoded credentials - as requested
login = 51708234
password = "4bM&wuVJcBTnjV"
server = "ICMarketsEU-Demo"

mt.initialize()
mt.login(login, password, server)

symbol = "EURUSD"
timeframe = mt.TIMEFRAME_D1
start_date = datetime(2010, 1, 1)
end_date = datetime(2023, 12, 31)

# For your risk calculations
StopLoss = 1
TakeProfit = 1
BreakEvenRatio = StopLoss / (StopLoss + TakeProfit)

def add_rolling_features(df, window):
    df['rolling_mean_open'] = df['open'].rolling(window=window).mean()
    df['rolling_std_open']  = df['open'].rolling(window=window).std()
    df['rolling_mean_close'] = df['close'].rolling(window=window).mean()
    df['rolling_std_close']  = df['close'].rolling(window=window).std()
    df['rolling_mean_high']  = df['high'].rolling(window=window).mean()
    df['rolling_std_high']   = df['high'].rolling(window=window).std()
    df['rolling_mean_low']   = df['low'].rolling(window=window).mean()
    df['rolling_std_low']    = df['low'].rolling(window=window).std()
    return df

def add_lag_features(df, lags):
    for lag in lags:
        df[f'open_lag_{lag}']  = df['open'].shift(lag)
        df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df[f'high_lag_{lag}']  = df['high'].shift(lag)
        df[f'low_lag_{lag}']   = df['low'].shift(lag)
    return df

def extract_rolling_features(df, signal, symbol, max_shift=20, min_shift=5):
    # Prepare for TSFresh
    df_melted = df[['time', signal]].copy()
    df_melted["Symbols"] = symbol
    
    df_rolled = roll_time_series(
        df_melted,
        column_id="Symbols",
        column_sort="time",
        max_timeshift=max_shift,
        min_timeshift=min_shift
    )
    X = extract_features(
        df_rolled.drop("Symbols", axis=1),
        column_id="id", 
        column_sort="time", 
        column_value=signal,
        impute_function=impute, 
        show_warnings=False
    )
    # Re-align the index (TSFresh creates a multi-index)
    X = X.set_index(X.index.map(lambda x: x[1]), drop=True)
    X.index.name = "time"
    return X.dropna()



ohlc_data = pd.DataFrame(mt.copy_rates_range(symbol, timeframe, start_date, end_date))
ohlc_data['time'] = pd.to_datetime(ohlc_data['time'], unit='s')
df = ohlc_data[['time', 'open', 'high', 'low', 'close']].copy()

df['EMA_9']  = talib.EMA(df['close'], timeperiod=9)
df['EMA_21'] = talib.EMA(df['close'], timeperiod=21)
df['EMA_50'] = talib.EMA(df['close'], timeperiod=50)

df['RSI_9']  = talib.RSI(df['close'], timeperiod=9)
df['RSI_14'] = talib.RSI(df['close'], timeperiod=14)
df['RSI_21'] = talib.RSI(df['close'], timeperiod=21)

df['WILLR_15']  = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=15)
df['WILLR_23']  = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=23)
df['WILLR_42']  = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=42)
df['WILLR_145'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=145)

df['SAR'] = talib.SAR(df['high'], df['low'], acceleration=0.02, maximum=0.2)

df['BB_upper'], df['BB_middle'], df['BB_lower'] = talib.BBANDS(
    df['close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0
)
df['BB_width'] = df['BB_upper'] - df['BB_lower']

df['MACD'], df['MACD_signal'], df['MACD_hist'] = talib.MACD(
    df['close'], fastperiod=12, slowperiod=26, signalperiod=9
)
df['CCI_14'] = talib.CCI(df['high'], df['low'], df['close'], timeperiod=14)

# Example custom rolling features
df = add_rolling_features(df, window=5)

# Drop early NaNs introduced by rolling calculations
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

# ================================
# 3) LABEL INITIALIZATION
# ================================
df['b_flag'] = 0
df['s_flag'] = 0
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

csv_file_path = 'EURUSD_D1_2010to2023.csv'  # Specify your desired path
df.to_csv(csv_file_path, index=False)

# ================================
# 4) LABEL THE DATA (STOPLOSS/TAKEPROFIT)
# ================================
label_data(df, [StopLoss], [TakeProfit], 80, symbol, print_data=False)



# For Open-of-Day approach, we do NOT shift b_flag by -1
df.drop(columns=['s_flag'], inplace=True)

# Optionally add lag features from the same day
df = add_lag_features(df, lags=[1, 2, 3, 4, 5])

# ================================
# 5) TSFRESH EXTRACTION FOR SELECT SIGNALS
# ================================
df['time'] = pd.to_datetime(df['time'])

X1  = extract_rolling_features(df, 'WILLR_15',  symbol)
X2  = extract_rolling_features(df, 'WILLR_42',  symbol)
X3  = extract_rolling_features(df, 'RSI_14',    symbol)
X4  = extract_rolling_features(df, 'MACD_hist', symbol)
X5  = extract_rolling_features(df, 'EMA_9',     symbol)
X6  = extract_rolling_features(df, 'EMA_21',    symbol)
X7  = extract_rolling_features(df, 'EMA_50',    symbol)
X9  = extract_rolling_features(df, 'RSI_9',     symbol)
X10 = extract_rolling_features(df, 'RSI_21',    symbol)
X11 = extract_rolling_features(df, 'WILLR_23',  symbol)
X12 = extract_rolling_features(df, 'WILLR_145', symbol)
X13 = extract_rolling_features(df, 'SAR',       symbol)
X14 = extract_rolling_features(df, 'BB_width',  symbol)
X15 = extract_rolling_features(df, 'MACD_signal', symbol)
X16 = extract_rolling_features(df, 'CCI_14',    symbol)

X_tsfresh = pd.concat(
    [X1, X2, X3, X4, X5, X6, X7, X9, X10, X11, X12, X13, X14, X15, X16],
    axis=1, join='inner'
).dropna()

# ================================
# 6) MERGE TSFRESH FEATURES WITH MAIN DF
# ================================
df = df.set_index(pd.to_datetime(df['time']))
df.drop(columns=['time'], inplace=True)

X = X_tsfresh[X_tsfresh.index.isin(df.index)]
X = pd.concat([df, X], axis=1, join='inner')

# Now X has open, high, low, close, indicators, b_flag, plus TSFresh features.

# ================================
# 7) SHIFT FEATURES FOR OPEN-OF-DAY
# ================================
X_df = X.copy()

target_col = 'b_flag'
all_cols  = [c for c in X_df.columns if c != target_col]
X_df = X_df[all_cols + [target_col]]

# SHIFT all features (except b_flag) by +1 row
feature_cols = all_cols
X_df[feature_cols] = X_df[feature_cols].shift(1)

# Drop rows with NaNs from shifting
X_df.dropna(inplace=True)

# ================================
# 8) TSFRESH FEATURE SELECTION
# (Apply it to the SHIFTED DataFrame)
# ================================
X_df = select_features(X_df, X_df[target_col], fdr_level=0.2)
X_df = X_df[[col for col in X_df if col != target_col] + [target_col]]

# ================================
# 9) CORRELATION FILTER
# ================================
corr_matrix = X_df.corr().abs()
upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
high_corr_features = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.9)]

# Avoid removing the target_col if it appears in high_corr_features
high_corr_features = [f for f in high_corr_features if f != target_col]

X_df.drop(columns=high_corr_features, inplace=True, errors='ignore')


Mean Candle: 0.009393820578962441


Rolling:   0%|          | 0/40 [00:02<?, ?it/s]


KeyboardInterrupt: 

In [4]:

# ================================
# 10) TRAIN / TEST SPLIT
# ================================
split = int(0.90 * len(X_df))
train_data = X_df.iloc[:split].copy()
test_data  = X_df.iloc[split:].copy()

x_train = train_data.drop(columns=[target_col]).values
y_train = train_data[target_col].values
x_test  = test_data.drop(columns=[target_col]).values
y_test  = test_data[target_col].values

# ================================
# 11) SCALE FEATURES
# ================================
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test  = scaler.transform(x_test)

print("Number of features before PCA:", x_train.shape[1])

# ================================
# 12) PCA
# ================================
pca_components = 15
pca = PCA(n_components=pca_components, svd_solver='randomized', random_state=0)
x_train = pca.fit_transform(x_train)
x_test  = pca.transform(x_test)

print("Number of features after PCA:", x_train.shape[1])

# ================================
# 13) MODEL TRAINING
# ================================
n_estimators = 50
class_weight = {0: 1, 1: 15}
max_features = 'sqrt'
random_state = 42

rf_classifier_mt = RandomForestClassifier(
    n_estimators=n_estimators,
    class_weight=class_weight,
    max_features=max_features,
    random_state=random_state
)

rf_classifier_mt.fit(x_train, y_train)
y_pred_proba = rf_classifier_mt.predict_proba(x_test)[:, 1]

# ================================
# 14) EVALUATION
# ================================
threshold = 0.6  # example threshold
y_pred = (y_pred_proba > threshold).astype(int)

conf_matrix = confusion_matrix(y_test, y_pred)
false_positives = conf_matrix[0][1]
true_positives  = conf_matrix[1][1]

precision = precision_score(y_test, y_pred) if (false_positives + true_positives) > 0 else 0
recall    = recall_score(y_test, y_pred)
f1        = f1_score(y_test, y_pred)
accuracy  = accuracy_score(y_test, y_pred)
roc_auc   = roc_auc_score(y_test, y_pred_proba)
classification_rep = classification_report(y_test, y_pred)

print("Confusion Matrix:")
print(conf_matrix)
print('Accuracy:', round(accuracy, 4))
print('Precision:', round(precision, 4))
print('Recall:', round(recall, 4))
print('F1 Score:', round(f1, 4))
print('ROC-AUC:', round(roc_auc, 4))
print('Classification Report:\n', classification_rep)
print('WIN/LOSS-Diff:', round(100 * (precision - BreakEvenRatio), 2), '%')
print('False Positives:', false_positives)
print('True Positives:', true_positives)
if (false_positives + true_positives) > 0:
    ratio = 100 * true_positives / (false_positives + true_positives)
    print('Ratio total:', round(ratio, 2))
print('BreakEvenRatio:', round(BreakEvenRatio, 2))
print('_______________________________________________________________')


Number of features before PCA: 70
Number of features after PCA: 15
Confusion Matrix:
[[187  29]
 [126  29]]
Accuracy: 0.5822
Precision: 0.5
Recall: 0.1871
F1 Score: 0.2723
ROC-AUC: 0.513
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.87      0.71       216
           1       0.50      0.19      0.27       155

    accuracy                           0.58       371
   macro avg       0.55      0.53      0.49       371
weighted avg       0.56      0.58      0.53       371

WIN/LOSS-Diff: 0.0 %
False Positives: 29
True Positives: 29
Ratio total: 50.0
BreakEvenRatio: 0.5
_______________________________________________________________


In [None]:
import MetaTrader5 as mt
import pandas as pd
import numpy as np
import talib
from talipp.indicators import EMA, SMA, Stoch, DPO
from joblib import dump
from datetime import datetime
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, confusion_matrix, classification_report, recall_score, accuracy_score, f1_score, roc_auc_score
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import roll_time_series, impute
from own_functions import label_data
from sklearn.decomposition import PCA

# Hardcoded credentials - as requested
login = 51708234
password = "4bM&wuVJcBTnjV"
server = "ICMarketsEU-Demo"

mt.initialize()
mt.login(login, password, server)

symbol = "EURUSD"
timeframe = mt.TIMEFRAME_D1
start_date = datetime(2010, 1, 1)
end_date = datetime(2024, 11, 10)
StopLoss = 1
TakeProfit = 1
BreakEvenRatio = StopLoss / (StopLoss + TakeProfit)

def add_rolling_features(df, window):
    df['rolling_mean_open'] = df['open'].rolling(window=window).mean()
    df['rolling_std_open'] = df['open'].rolling(window=window).std()
    df['rolling_mean_close'] = df['close'].rolling(window=window).mean()
    df['rolling_std_close'] = df['close'].rolling(window=window).std()
    df['rolling_mean_high'] = df['high'].rolling(window=window).mean()
    df['rolling_std_high'] = df['high'].rolling(window=window).std()
    df['rolling_mean_low'] = df['low'].rolling(window=window).mean()
    df['rolling_std_low'] = df['low'].rolling(window=window).std()
    return df

def add_lag_features(df, lags):
    for lag in lags:
        df[f'open_lag_{lag}'] = df['open'].shift(lag)
        df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df[f'high_lag_{lag}'] = df['high'].shift(lag)
        df[f'low_lag_{lag}'] = df['low'].shift(lag)
    return df

def extract_rolling_features(df, signal, symbol, max_shift=20, min_shift=5):
    df_melted = df[['time', signal]].copy()
    df_melted["Symbols"] = symbol
    df_rolled = roll_time_series(df_melted, column_id="Symbols", column_sort="time",
                                 max_timeshift=max_shift, min_timeshift=min_shift)
    X = extract_features(df_rolled.drop("Symbols", axis=1),
                         column_id="id", column_sort="time", column_value=signal,
                         impute_function=impute, show_warnings=False)
    X = X.set_index(X.index.map(lambda x: x[1]), drop=True)
    X.index.name = "time"
    return X.dropna()

# Fetch historical data
ohlc_data = pd.DataFrame(mt.copy_rates_range(symbol, timeframe, start_date, end_date))
ohlc_data['time'] = pd.to_datetime(ohlc_data['time'], unit='s')
df = ohlc_data[['time', 'open', 'high', 'low', 'close']].copy()

df['EMA_9'] = talib.EMA(df['close'], timeperiod=9)
df['EMA_21'] = talib.EMA(df['close'], timeperiod=21)
df['EMA_50'] = talib.EMA(df['close'], timeperiod=50)

df['RSI_9'] = talib.RSI(df['close'], timeperiod=9)
df['RSI_14'] = talib.RSI(df['close'], timeperiod=14)
df['RSI_21'] = talib.RSI(df['close'], timeperiod=21)

df['WILLR_15'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=15)
df['WILLR_23'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=23)
df['WILLR_42'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=42)
df['WILLR_145'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=145)

df['SAR'] = talib.SAR(df['high'], df['low'], acceleration=0.02, maximum=0.2)

df['BB_upper'], df['BB_middle'], df['BB_lower'] = talib.BBANDS(df['close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
df['BB_width'] = df['BB_upper'] - df['BB_lower']

df['MACD'], df['MACD_signal'], df['MACD_hist'] = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
df['CCI_14'] = talib.CCI(df['high'], df['low'], df['close'], timeperiod=14)

df = add_rolling_features(df, window=5)


df = df.dropna().reset_index(drop=True)
df['b_flag'] = 0
df['s_flag'] = 0
df = df.dropna().reset_index(drop=True)

#csv_file_path = 'EURUSD_D1_2010to101124.csv'  # Specify your desired path
#df.to_csv(csv_file_path, index=False)

label_data(df, [StopLoss], [TakeProfit], 80, symbol, False)

df['b_flag'] = df['b_flag'].shift(-1)
df = add_lag_features(df, lags=[1, 2, 3, 4, 5])

In [None]:
# Calculate total number of 1s in b_flag and s_flag columns
total_b_flags = df['b_flag'].sum()
total_s_flags = df['s_flag'].sum()

# Total number of rows in the DataFrame
total_rows = len(df)

# Calculate counts in segments of complete 100% data
count_100_b_flags = total_b_flags
count_100_s_flags = total_s_flags

# Calculate counts in intervals of 10%
interval_counts = []
for i in range(0, 101, 10):
    start_idx = int(i / 100 * total_rows)
    end_idx = int((i + 10) / 100 * total_rows)
    
    interval_b_flags = df['b_flag'].iloc[start_idx:end_idx].sum()
    interval_s_flags = df['s_flag'].iloc[start_idx:end_idx].sum()
    
    interval_counts.append((f'{i}% - {i+10}%', interval_b_flags, interval_s_flags))

# Print results
print("Total number of 1s:")
print(f"b_flag: {total_b_flags}")
print(f"s_flag: {total_s_flags}")

print("\nCounts in segments of 100% data:")
print(f"b_flag: {count_100_b_flags}")
print(f"s_flag: {count_100_s_flags}")

print("\nCounts in intervals of 10%:")
for interval, count_b, count_s in interval_counts:
    print(f"{interval}: b_flag={count_b}, s_flag={count_s}")

In [None]:
df.drop(columns=['s_flag'], inplace=True)

X1 = extract_rolling_features(df, 'WILLR_15', symbol)
X2 = extract_rolling_features(df, 'WILLR_42', symbol)
X3 = extract_rolling_features(df, 'RSI_14', symbol)
X4 = extract_rolling_features(df, 'MACD_hist', symbol)
X5 = extract_rolling_features(df, 'EMA_9', symbol)
X6 = extract_rolling_features(df, 'EMA_21', symbol)
X7 = extract_rolling_features(df, 'EMA_50', symbol)
X9 = extract_rolling_features(df, 'RSI_9', symbol)
X10 = extract_rolling_features(df, 'RSI_21', symbol)
X11 = extract_rolling_features(df, 'WILLR_23', symbol)
X12 = extract_rolling_features(df, 'WILLR_145', symbol)
X13 = extract_rolling_features(df, 'SAR', symbol)
X14 = extract_rolling_features(df, 'BB_width', symbol)
X15 = extract_rolling_features(df, 'MACD_signal', symbol)
X16 = extract_rolling_features(df, 'CCI_14', symbol)

# Combine all extracted features
X = pd.concat([X1, X2, X3, X4, X5, X6, X7, X9, X10, X11, X12, X13, X14, X15, X16], axis=1, join='inner').dropna()

df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
X = X[X.index.isin(df.index)]
X = pd.concat([df,X], axis=1, join='inner')

X_df = select_features(X, X['b_flag'])
X_df = X_df[[col for col in X_df if col != 'b_flag'] + ['b_flag']]

correlation_matrix = X_df.corr().abs()
upper_triangle = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool))
high_correlation_features = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.9)]
X_df = X_df.drop(columns=high_correlation_features)

#original_index = X_df.index
#X_df = X_df.shift(periods=1, axis=0)
#X_df.index = original_index
X_df = X_df.dropna()


In [None]:
#print(X_df[['b_flag'] + X_df.columns[:5]].head(10)) 

In [None]:
split = int(0.90 * len(X_df))
train_data, test_data = X_df.iloc[:split], X_df.iloc[split:]

x_train = train_data.iloc[:, :-1].values
y_train = train_data['b_flag'].values
x_test = test_data.iloc[:, :-1].values
y_test = test_data['b_flag'].values

sc_mt = StandardScaler()
x_train = sc_mt.fit_transform(x_train)
x_test = sc_mt.transform(x_test)

print("Number of features before PCA:", x_train.shape[1])

# Apply PCA to reduce dimensionality

pca = PCA(n_components=10, svd_solver='randomized', random_state=0)
x_train = pca.fit_transform(x_train)
x_test = pca.transform(x_test)

print("Number of features after PCA:", x_train.shape[1])

n_estimators = 50
class_weight = {0: 15, 1: 15}
max_features = 'sqrt'
random_state = 42

rf_classifier_mt = RandomForestClassifier(
    n_estimators=n_estimators,
    class_weight=class_weight,
    max_features=max_features,
    random_state=random_state
)

rf_classifier_mt.fit(x_train, y_train)
y_pred_proba = rf_classifier_mt.predict_proba(x_test)[:, 1]

# Add probability threshold for predicting class 1
threshold = 0.6  # Adjust as needed to increase precision
y_pred = (y_pred_proba > threshold).astype(int)

conf_matrix = confusion_matrix(y_test, y_pred)
false_positives = conf_matrix[0][1]
true_positives = conf_matrix[1][1]

precision = precision_score(y_test, y_pred) if (false_positives+true_positives) > 0 else 0
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)
classification_rep = classification_report(y_test, y_pred)

print("Confusion Matrix:")
print(conf_matrix)
print('Accuracy:', round(accuracy, 4))
print('Precision:', round(precision, 4))
print('Recall:', round(recall, 4))
print('F1 Score:', round(f1, 4))
print('ROC-AUC:', round(roc_auc, 4))
print('Classification Report:\n', classification_rep)
print('WIN/LOSS-Diff:', round(100 * (precision - BreakEvenRatio), 2), '%')
print('False Positives:', false_positives)
print('True Positives:', true_positives)
if (false_positives + true_positives) > 0:
    print('Ratio total:', round(100 * (true_positives / (false_positives + true_positives)), 2))
print('BreakEvenRatio:', round(BreakEvenRatio, 2))
print('____________________________________________________________________________________________________________________________')


In [None]:
""" import json
feature_names = X_df.columns
with open('AUDUSD_D1_3112buy/feature_names.json', 'w') as f:
    json.dump(list(feature_names), f)
 """/

In [None]:
# Convert index to datetime without 'unit' since the format is already date strings
X_df.index = pd.to_datetime(X_df.index)

# Format datetime to the desired string format
X_df.index = X_df.index.strftime('%Y-%m-%d %H:%M:%S')

# Creating a DataFrame for predictions with the correct index
df_pred = pd.DataFrame(index=X_df.iloc[split:].index)  # No need for split+1
df_pred['prediction'] = y_pred

# Save to CSV
df_pred.to_csv('predEURUSD_D1_3112buy.csv')
