In [1]:
import MetaTrader5 as mt
import pandas as pd
import matplotlib.pylab as plt
import numpy as np
import talib
from talipp.indicators import EMA, SMA, Stoch, DPO
from joblib import dump
from datetime import datetime
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, confusion_matrix, classification_report
from own_functions import *
import os
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import roll_time_series, make_forecasting_frame
from tsfresh.utilities.dataframe_functions import impute

mt.initialize()
login = 51708234
password ="4bM&wuVJcBTnjV"
server = "ICMarketsEU-Demo"
mt.login(login,password,server)

symbol = "GBPUSD"
timeframe = mt.TIMEFRAME_D1
ohlc_data = pd.DataFrame(mt.copy_rates_range(symbol, timeframe, datetime(2000, 1, 1), datetime(2023, 12, 31)))
ohlc_data['time'] = pd.to_datetime(ohlc_data['time'], unit='s')
df = ohlc_data[['time', 'open', 'high', 'low', 'close']].copy()


def add_rolling_features(df, window):
    df['rolling_mean_open'] = df['open'].rolling(window=window).mean()
    df['rolling_std_open'] = df['open'].rolling(window=window).std()
    df['rolling_mean_close'] = df['close'].rolling(window=window).mean()
    df['rolling_std_close'] = df['close'].rolling(window=window).std()
    df['rolling_mean_high'] = df['high'].rolling(window=window).mean()
    df['rolling_std_high'] = df['high'].rolling(window=window).std()
    df['rolling_mean_low'] = df['low'].rolling(window=window).mean()
    df['rolling_std_low'] = df['low'].rolling(window=window).std()
    return df

# Function to add lag features
def add_lag_features(df, lags):
    for lag in lags:
        df[f'open_lag_{lag}'] = df['open'].shift(lag)
        df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df[f'high_lag_{lag}'] = df['high'].shift(lag)
        df[f'low_lag_{lag}'] = df['low'].shift(lag)
    return df

# Indicators
# Calculate indicators
df['WILLR_15'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=15)
df['WILLR_23'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=23)
df['WILLR_42'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=42)
df['WILLR_145'] = talib.WILLR(df['high'], df['low'], df['close'], timeperiod=145)

df = add_rolling_features(df, window=5)
df = add_lag_features(df, lags=[1, 2, 3, 4, 5])

df = df.dropna().reset_index(drop=True)

# Buy & Sell Flags
df['b_flag'] = 0
df['s_flag'] = 0

# Dropping NaN values and resetting index
df = df.dropna().reset_index(drop=True)

""" csv_file_path = 'ICMT5EURUSD2000_043024_D1.csv'  # Specify your desired path
df.to_csv(csv_file_path, index=False) """

StopLoss = 1
TakeProfit = 1
BreakEvenRatio=StopLoss/(StopLoss+TakeProfit)
label_data(df,[StopLoss],[TakeProfit],80,symbol,False)



Mean Candle: 0.01323269578065998


In [2]:
# Calculate total number of 1s in b_flag and s_flag columns
total_b_flags = df['b_flag'].sum()
total_s_flags = df['s_flag'].sum()

# Total number of rows in the DataFrame
total_rows = len(df)

# Calculate counts in segments of complete 100% data
count_100_b_flags = total_b_flags
count_100_s_flags = total_s_flags

# Calculate counts in intervals of 10%
interval_counts = []
for i in range(0, 101, 10):
    start_idx = int(i / 100 * total_rows)
    end_idx = int((i + 10) / 100 * total_rows)
    
    interval_b_flags = df['b_flag'].iloc[start_idx:end_idx].sum()
    interval_s_flags = df['s_flag'].iloc[start_idx:end_idx].sum()
    
    interval_counts.append((f'{i}% - {i+10}%', interval_b_flags, interval_s_flags))

# Print results
print("Total number of 1s:")
print(f"b_flag: {total_b_flags}")
print(f"s_flag: {total_s_flags}")

print("\nCounts in segments of 100% data:")
print(f"b_flag: {count_100_b_flags}")
print(f"s_flag: {count_100_s_flags}")

print("\nCounts in intervals of 10%:")
for interval, count_b, count_s in interval_counts:
    print(f"{interval}: b_flag={count_b}, s_flag={count_s}")

Total number of 1s:
b_flag: 2985
s_flag: 2973

Counts in segments of 100% data:
b_flag: 2985
s_flag: 2973

Counts in intervals of 10%:
0% - 10%: b_flag=307, s_flag=301
10% - 20%: b_flag=350, s_flag=256
20% - 30%: b_flag=307, s_flag=298
30% - 40%: b_flag=282, s_flag=296
40% - 50%: b_flag=289, s_flag=317
50% - 60%: b_flag=315, s_flag=292
60% - 70%: b_flag=269, s_flag=336
70% - 80%: b_flag=322, s_flag=286
80% - 90%: b_flag=322, s_flag=286
90% - 100%: b_flag=222, s_flag=305
100% - 110%: b_flag=0, s_flag=0


In [3]:
# Feature extraction
df.drop(columns=['s_flag'], inplace=True)

selected_signal_1 = 'WILLR_15'
df_melted_1 = df[['time', selected_signal_1]].copy()
df_melted_1["Symbols"] = symbol

df_rolled_1 = roll_time_series(df_melted_1, column_id="Symbols", column_sort="time",
                               max_timeshift=20, min_timeshift=5)

X1 = extract_features(df_rolled_1.drop("Symbols", axis=1), 
                      column_id="id", column_sort="time", column_value=selected_signal_1, 
                      impute_function=impute, show_warnings=False)

X1 = X1.set_index(X1.index.map(lambda x: x[1]), drop=True)
X1.index.name = "time"
X1 = X1.dropna()

selected_signal_2 = 'WILLR_42'
df_melted_2 = df[['time', selected_signal_2]].copy()
df_melted_2["Symbols"] = symbol

df_rolled_2 = roll_time_series(df_melted_2, column_id="Symbols", column_sort="time",
                               max_timeshift=20, min_timeshift=5)

X2 = extract_features(df_rolled_2.drop("Symbols", axis=1), 
                      column_id="id", column_sort="time", column_value=selected_signal_2, 
                      impute_function=impute, show_warnings=False)

X2 = X2.set_index(X2.index.map(lambda x: x[1]), drop=True)
X2.index.name = "time"
X2 = X2.dropna()

X = pd.concat([X1, X2], axis=1, join='inner')
X = X.dropna()

# Align indices
df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
df = df[df.index.isin(X.index)]

X = pd.concat([X, df], axis=1, join='inner')

# Ensure b_flag is at the end after feature selection
X_df = select_features(X, X['b_flag'])
X_df = X_df[[col for col in X_df if col != 'b_flag'] + ['b_flag']]

correlation_matrix = X_df.corr().abs()
upper_triangle = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool))
high_correlation_features = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.8)]
X_df = X_df.drop(columns=high_correlation_features)


original_index = X_df.index
shifted_X_df = X_df.shift(periods=1,axis=0)
shifted_X_df.index = original_index

X_df = shifted_X_df.dropna()


# Get the list of selected feature names
selected_feature_names_X = list(X_df.columns)

# Combine lists if you need a single list for all selected features

print(selected_feature_names_X )

Rolling: 100%|██████████| 40/40 [00:04<00:00,  8.65it/s]
Feature Extraction: 100%|██████████| 40/40 [01:15<00:00,  1.90s/it]
Rolling: 100%|██████████| 40/40 [00:04<00:00,  8.19it/s]
Feature Extraction: 100%|██████████| 40/40 [01:20<00:00,  2.02s/it]


['WILLR_42__mean_second_derivative_central', 'WILLR_15', 'WILLR_15__agg_linear_trend__attr_"slope"__chunk_len_10__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"real"__coeff_10', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_7', 'WILLR_15__fft_coefficient__attr_"real"__coeff_9', 'WILLR_42__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"min"', 'WILLR_42__fft_coefficient__attr_"imag"__coeff_8', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_6', 'WILLR_15__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"max"', 'WILLR_42__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"mean"', 'WILLR_15__fft_coefficient__attr_"imag"__coeff_5', 'WILLR_15__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"min"', 'WILLR_145', 'WILLR_42__agg_linear_trend__attr_"stderr"__chunk_len_10__f_agg_"max"', 'WILLR_15__fft_coefficient__attr_"angle"__coeff_7', 'WILLR_42__fft_coefficient__attr_"angle"__coeff_7', 'WILLR_15__fft_coefficient__attr_"real"__coeff_8', 'WILLR_15__index_mass_quantile__q_0.

In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import TimeSeriesSplit
from joblib import dump


# Function to adjust threshold for better prediction
def adjust_threshold(probas, target):
    best_threshold = 0.5
    best_f1 = 0
    for threshold in np.arange(0.1, 1.0, 0.1):
        preds = (probas >= threshold).astype(int)
        f1 = f1_score(target, preds)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    return best_threshold

# Setup directories
os.makedirs('GBPUSD_D1_3112_NEWBuy', exist_ok=True)

# TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Define models for ensemble
clf1 = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=4,
    min_samples_leaf=4,
    max_features='sqrt',
    random_state=0,
    max_leaf_nodes=10,
    bootstrap=True,
    oob_score=True,
    ccp_alpha=0.01,
    class_weight={0: 10, 1: 15}
)

clf2 = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=0
)

clf3 = LogisticRegression(random_state=0)

# Ensemble model - Voting Classifier
ensemble_clf = VotingClassifier(estimators=[
    ('rf', clf1),
    ('gb', clf2),
    ('lr', clf3)
], voting='soft')  # Use 'soft' voting for better probability estimates

# Ensure the time series order is preserved in the split
X = X_df.iloc[:, :-1].values
y = X_df['b_flag'].values

# Initialize variables for aggregating results and tracking the best model
overall_precision_sum_train = 0
overall_recall_sum_train = 0
overall_f1_sum_train = 0
overall_precision_sum_test = 0
overall_recall_sum_test = 0
overall_f1_sum_test = 0
best_f1_score = -np.inf
best_model_fold = None
best_model = None
best_test_predictions = None
best_test_thresholded_predictions = None
best_test_indices = None
best_threshold_value = None  # To store the best threshold
split_num = 0  # Initialize split_num here


for fold, (train_index, test_index) in enumerate(tscv.split(X_df)):
    print(f"Fold {fold + 1}")

    # Train data
    x_train, y_train = X[train_index], y[train_index]
    x_test, y_test = X[test_index], y[test_index]

    # Scale Data
    sc_mt = StandardScaler()
    x_train = sc_mt.fit_transform(x_train)
    x_test = sc_mt.transform(x_test)

    # Save scaler
    dump(sc_mt, f'GBPUSD_D1_3112_NEWBuy/scaler_fold_{fold + 1}.joblib')

    # Train Ensemble Model
    ensemble_clf.fit(x_train, y_train)

    # Save the trained model
    dump(ensemble_clf, f'GBPUSD_D1_3112_NEWBuy/model_fold_{fold + 1}.joblib')

    # Predict on training data
    y_train_pred = ensemble_clf.predict(x_train)

    # Predict on testing data
    y_test_pred = ensemble_clf.predict(x_test)
    y_test_prob = ensemble_clf.predict_proba(x_test)[:, 1]

    # Apply Custom Threshold on Testing Data
    custom_threshold = adjust_threshold(y_test_prob, y_test)
    y_test_pred_thresholded = (y_test_prob >= custom_threshold).astype(int)

    # Track the best model based on F1 score
    if f1_score(y_test, y_test_pred) > best_f1_score:
        best_f1_score = f1_score(y_test, y_test_pred)
        best_model_fold = fold + 1
        best_model = ensemble_clf
        best_test_predictions = y_test_pred
        best_test_thresholded_predictions = y_test_pred_thresholded
        best_test_indices = test_index
        best_threshold_value = custom_threshold  # Store the best threshold

    # Aggregate metrics across folds if needed
    overall_precision_sum_train += precision_score(y_train, y_train_pred)
    overall_recall_sum_train += recall_score(y_train, y_train_pred)
    overall_f1_sum_train += f1_score(y_train, y_train_pred)

    overall_precision_sum_test += precision_score(y_test, y_test_pred)
    overall_recall_sum_test += recall_score(y_test, y_test_pred)
    overall_f1_sum_test += f1_score(y_test, y_test_pred)

    split_num += 1

# After cross-validation, calculate average metrics
overall_precision_train = overall_precision_sum_train / split_num
overall_recall_train = overall_recall_sum_train / split_num
overall_f1_train = overall_f1_sum_train / split_num

overall_precision_test = overall_precision_sum_test / split_num
overall_recall_test = overall_recall_sum_test / split_num
overall_f1_test = overall_f1_sum_test / split_num

# Save the best model
best_model_path = f'GBPUSD_D1_3112_NEWBuy/best_model_fold_{best_model_fold}.joblib'
dump(best_model, best_model_path)
print(f"The best model was from fold {best_model_fold} with an F1 score of {best_f1_score:.4f}")
print(f"Best threshold for this model: {best_threshold_value:.2f}")
print(f"Model saved to {best_model_path}")

# Save predictions of the best model to a CSV file
df_pred = pd.DataFrame(index=best_test_indices)
df_pred['prediction'] = best_test_predictions
df_pred['prediction_thresholded'] = best_test_thresholded_predictions
df_pred['actual'] = y[best_test_indices]
df_pred.to_csv('GBPUSD_D1_3112_NEWBuy/best_model_predictions.csv', index=True)

print('Overall Training Data Metrics:')
print('Precision:', overall_precision_train)
print('Recall:', overall_recall_train)
print('F1 Score:', overall_f1_train)

print('Overall Testing Data Metrics:')
print('Precision:', overall_precision_test)
print('Recall:', overall_recall_test)
print('F1 Score:', overall_f1_test)


Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
The best model was from fold 1 with an F1 score of 0.7352
Best threshold for this model: 0.50
Model saved to GBPUSD_D1_3112_NEWBuy/best_model_fold_1.joblib
Overall Training Data Metrics:
Precision: 0.7786304412783016
Recall: 0.8638897078851212
F1 Score: 0.8189964758506472
Overall Testing Data Metrics:
Precision: 0.6451314612669227
Recall: 0.7634328943071673
F1 Score: 0.6981876456146756


In [6]:
import json
feature_names = X_df.columns
with open('GBPUSD_D1_3112_NEWBuy/feature_names.json', 'w') as f:
    json.dump(list(feature_names), f)


In [6]:
import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import TimeSeriesSplit
from joblib import dump

# Function to adjust threshold for better prediction
def adjust_threshold(probas, target):
    best_threshold = 0.5
    best_f1 = 0
    for threshold in np.arange(0.1, 1.0, 0.1):
        preds = (probas >= threshold).astype(int)
        f1 = f1_score(target, preds)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
    return best_threshold

# Setup directories
os.makedirs('GBPUSD_D1_3112_Buylatest', exist_ok=True)

# TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Define models for ensemble with improved hyperparameters
clf1 = RandomForestClassifier(
    n_estimators=200,  # Increased number of trees
    max_depth=25,  # Increased depth of trees
    min_samples_split=3,  # Reduced to allow more splits
    min_samples_leaf=2,  # Reduced to allow smaller leaves
    max_features='sqrt',  # Changed to auto to consider all features
    random_state=0,
    max_leaf_nodes=20,  # Increased number of leaf nodes
    bootstrap=True,
    oob_score=True,
    ccp_alpha=0.005,  # Reduced complexity parameter
    class_weight={0: 10, 1: 20}  # Increased class weight for positive class
)

clf2 = GradientBoostingClassifier(
    n_estimators=200,  # Increased number of boosting stages
    learning_rate=0.05,  # Reduced learning rate for finer updates
    max_depth=4,  # Increased depth for more complex interactions
    random_state=0
)

clf3 = LogisticRegression(
    C=0.01,  # Regularization parameter
    solver='liblinear'  # Solver for small datasets and binary classification
)

# Ensemble model - Voting Classifier
ensemble_clf = VotingClassifier(estimators=[
    ('rf', clf1),
    ('gb', clf2),
    ('lr', clf3)
], voting='soft')  # Use 'soft' voting for better probability estimates

# Ensure the time series order is preserved in the split
X = X_df.iloc[:, :-1].values
y = X_df['b_flag'].values

# Initialize variables for aggregating results and tracking the best model
overall_precision_sum_train = 0
overall_recall_sum_train = 0
overall_f1_sum_train = 0
overall_precision_sum_test = 0
overall_recall_sum_test = 0
overall_f1_sum_test = 0
best_f1_score = -np.inf
best_model_fold = None
best_model = None
best_test_predictions = None
best_test_thresholded_predictions = None
best_test_indices = None
split_num = 0  # Initialize split_num here

# Define BreakEvenRatio
break_even_ratio = 0.5  # Example, adjust according to your specific risk/reward ratio

# Lists to store metrics for each fold
fold_metrics = []

for fold, (train_index, test_index) in enumerate(tscv.split(X_df)):
    print(f"Fold {fold + 1}")

    # Train data
    x_train, y_train = X[train_index], y[train_index]
    x_test, y_test = X[test_index], y[test_index]

    # Scale Data
    sc_mt = StandardScaler()
    x_train = sc_mt.fit_transform(x_train)
    x_test = sc_mt.transform(x_test)

    # Save scaler
    dump(sc_mt, f'GBPUSD_D1_3112_BuyNEW/scaler_fold_{fold + 1}.joblib')

    # Train Ensemble Model
    ensemble_clf.fit(x_train, y_train)

    # Save the trained model
    dump(ensemble_clf, f'GBPUSD_D1_3112_BuyNEW/model_fold_{fold + 1}.joblib')

    # Predict on training data
    y_train_pred = ensemble_clf.predict(x_train)
    y_train_prob = ensemble_clf.predict_proba(x_train)[:, 1]

    # Predict on testing data
    y_test_pred = ensemble_clf.predict(x_test)
    y_test_prob = ensemble_clf.predict_proba(x_test)[:, 1]

    # Apply Custom Threshold on Testing Data
    custom_threshold = adjust_threshold(y_test_prob, y_test)
    y_test_pred_thresholded = (y_test_prob >= custom_threshold).astype(int)

    # Calculate metrics for training and testing
    precision_train = precision_score(y_train, y_train_pred)
    recall_train = recall_score(y_train, y_train_pred)
    f1_train = f1_score(y_train, y_train_pred)

    precision_test = precision_score(y_test, y_test_pred)
    recall_test = recall_score(y_test, y_test_pred)
    f1_test = f1_score(y_test, y_test_pred)
    auc_test = roc_auc_score(y_test, y_test_prob)

    # Print metrics for this fold
    print(f"Fold {fold + 1} Training Metrics:")
    print(f"Precision: {precision_train:.4f}, Recall: {recall_train:.4f}, F1 Score: {f1_train:.4f}")

    print(f"Fold {fold + 1} Testing Metrics:")
    print(f"Precision: {precision_test:.4f}, Recall: {recall_test:.4f}, F1 Score: {f1_test:.4f}, AUC: {auc_test:.4f}")
    print(f"Custom Threshold: {custom_threshold:.2f}, BreakEvenRatio: {break_even_ratio:.2f}")

    # Track the best model based on F1 score
    if f1_score(y_test, y_test_pred) > best_f1_score:
        best_f1_score = f1_score(y_test, y_test_pred)
        best_model_fold = fold + 1
        best_model = ensemble_clf
        best_test_predictions = y_test_pred
        best_test_thresholded_predictions = y_test_pred_thresholded
        best_test_indices = test_index

    # Aggregate metrics across folds
    overall_precision_sum_train += precision_train
    overall_recall_sum_train += recall_train
    overall_f1_sum_train += f1_train

    overall_precision_sum_test += precision_test
    overall_recall_sum_test += recall_test
    overall_f1_sum_test += f1_test

    # Store metrics for this fold
    fold_metrics.append({
        'Fold': fold + 1,
        'Precision_Train': precision_train,
        'Recall_Train': recall_train,
        'F1_Train': f1_train,
        'Precision_Test': precision_test,
        'Recall_Test': recall_test,
        'F1_Test': f1_test,
        'AUC_Test': auc_test,
        'Custom_Threshold': custom_threshold,
        'BreakEvenRatio': break_even_ratio
    })

    split_num += 1

# After cross-validation, calculate average metrics
overall_precision_train = overall_precision_sum_train / split_num
overall_recall_train = overall_recall_sum_train / split_num
overall_f1_train = overall_f1_sum_train / split_num

overall_precision_test = overall_precision_sum_test / split_num
overall_recall_test = overall_recall_sum_test / split_num
overall_f1_test = overall_f1_sum_test / split_num

# Save the best model
best_model_path = f'GBPUSD_D1_3112_BuyNEW/best_model_fold_{best_model_fold}.joblib'
dump(best_model, best_model_path)
print(f"The best model was from fold {best_model_fold} with an F1 score of {best_f1_score:.4f}")
print(f"Model saved to {best_model_path}")

# Save predictions of the best model to a CSV file
df_pred = pd.DataFrame(index=best_test_indices)
df_pred['prediction'] = best_test_predictions
df_pred['prediction_thresholded'] = best_test_thresholded_predictions
df_pred['actual'] = y[best_test_indices]
df_pred.to_csv('GBPUSD_D1_3112_BuyNEW/best_model_predictions.csv', index=True)

print('Overall Training Data Metrics:')
print(f'Precision: {overall_precision_train:.4f}, Recall: {overall_recall_train:.4f}, F1 Score: {overall_f1_train:.4f}')

print('Overall Testing Data Metrics:')
print(f'Precision: {overall_precision_test:.4f}, Recall: {overall_recall_test:.4f}, F1 Score: {overall_f1_test:.4f}')

# Convert fold metrics to a DataFrame and save to a CSV file
df_fold_metrics = pd.DataFrame(fold_metrics)
df_fold_metrics.to_csv('GBPUSD_D1_3112_BuyNEW/fold_metrics.csv', index=False)


Fold 1
Fold 1 Training Metrics:
Precision: 0.8795, Recall: 0.9818, F1 Score: 0.9278
Fold 1 Testing Metrics:
Precision: 0.6604, Recall: 0.8061, F1 Score: 0.7260, AUC: 0.7732
Custom Threshold: 0.50, BreakEvenRatio: 0.50
Fold 2
Fold 2 Training Metrics:
Precision: 0.8233, Recall: 0.9440, F1 Score: 0.8795
Fold 2 Testing Metrics:
Precision: 0.6301, Recall: 0.7987, F1 Score: 0.7044, AUC: 0.7861
Custom Threshold: 0.50, BreakEvenRatio: 0.50
Fold 3
Fold 3 Training Metrics:
Precision: 0.7831, Recall: 0.9012, F1 Score: 0.8380
Fold 3 Testing Metrics:
Precision: 0.6190, Recall: 0.7696, F1 Score: 0.6861, AUC: 0.7511
Custom Threshold: 0.50, BreakEvenRatio: 0.50
Fold 4
Fold 4 Training Metrics:
Precision: 0.7604, Recall: 0.8807, F1 Score: 0.8161
Fold 4 Testing Metrics:
Precision: 0.6753, Recall: 0.7305, F1 Score: 0.7018, AUC: 0.7319
Custom Threshold: 0.40, BreakEvenRatio: 0.50
Fold 5
Fold 5 Training Metrics:
Precision: 0.7446, Recall: 0.8784, F1 Score: 0.8060
Fold 5 Testing Metrics:
Precision: 0.5818, R