In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import TimeSeriesSplit
from joblib import dump
import os
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE

sum_fp = 0
sum_tp = 0

# TimeSeriesSplit for cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Directory to save model and scaler
os.makedirs('Dump_GBPUSD_D1_3112_Buy', exist_ok=True)

# Initialise RandomForestClassifier with fixed hyperparameters
rf_classifier_mt = RandomForestClassifier(
    n_estimators=200,
    max_depth=40,
    min_samples_split=2,
    min_samples_leaf=2,
    max_features='sqrt',
    random_state=0,
    max_leaf_nodes=20,
    bootstrap=True,
    oob_score=True,
    ccp_alpha=0,
    class_weight={0: 10, 1: 15}
)

# Ensure the time series order is preserved in the split
X = X_df.iloc[:, :-1].values
y = X_df['b_flag'].values

for fold, (train_index, test_index) in enumerate(tscv.split(X_df)):
    print(f"Fold {fold + 1}")

    # Train data
    x_train, y_train = X[train_index], y[train_index]
    # Test data
    x_test, y_test = X[test_index], y[test_index]

    # Scale Data
    sc_mt = StandardScaler()
    x_train = sc_mt.fit_transform(x_train)
    x_test = sc_mt.transform(x_test)

    # Save scaler
    dump(sc_mt, f'GBPUSD_D1_3112_Buy/scaler_fold_{fold + 1}.joblib')

    # Train Model
    rf_classifier_mt.fit(x_train, y_train)

    # Save the trained model
    dump(rf_classifier_mt, f'GBPUSD_D1_3112_Buy/model_fold_{fold + 1}.joblib')

    # Predict on training data
    y_train_pred = rf_classifier_mt.predict(x_train)

    print("Confusion Matrix (Training Data):")
    cm_train = confusion_matrix(y_train, y_train_pred)
    print(cm_train)

    train_false_positives = cm_train[0][1]
    train_true_positives = cm_train[1][1]

    train_precision = precision_score(y_train, y_train_pred)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_recall = recall_score(y_train, y_train_pred)
    train_f1 = f1_score(y_train, y_train_pred)
    train_roc_auc = roc_auc_score(y_train, rf_classifier_mt.predict_proba(x_train)[:, 1])

    print('Training Data Results:')
    print('WIN/LOSS-Diff:', round(100 * (train_precision - BreakEvenRatio), 2), '%')
    print('False Positives:', train_false_positives)
    print('True Positives:', train_true_positives)
    print('Precision:', train_precision)
    print('Accuracy:', train_accuracy)
    print('Recall:', train_recall)
    print('F1 Score:', train_f1)
    print('ROC AUC:', train_roc_auc)
    print('Ratio Total:', round(100 * (train_true_positives / (train_false_positives + train_true_positives)), 2))
    print('BreakEvenRatio:', round(BreakEvenRatio, 2))
    print('____________________________________________________________________________________________________________________________')

    # Predict on testing data
    y_test_pred = rf_classifier_mt.predict(x_test)

    print("Confusion Matrix (Testing Data):")
    cm_test = confusion_matrix(y_test, y_test_pred)
    print(cm_test)

    test_false_positives = cm_test[0][1]
    test_true_positives = cm_test[1][1]

    test_precision = precision_score(y_test, y_test_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_recall = recall_score(y_test, y_test_pred)
    test_f1 = f1_score(y_test, y_test_pred)
    test_roc_auc = roc_auc_score(y_test, rf_classifier_mt.predict_proba(x_test)[:, 1])

    print('Testing Data Results:')
    print('WIN/LOSS-Diff:', round(100 * (test_precision - BreakEvenRatio), 2), '%')
    print('False Positives:', test_false_positives)
    print('True Positives:', test_true_positives)
    print('Precision:', test_precision)
    print('Accuracy:', test_accuracy)
    print('Recall:', test_recall)
    print('F1 Score:', test_f1)
    print('ROC AUC:', test_roc_auc)
    print('Ratio Total:', round(100 * (test_true_positives / (test_false_positives + test_true_positives)), 2))
    print('BreakEvenRatio:', round(BreakEvenRatio, 2))
    print('____________________________________________________________________________________________________________________________')


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, precision_score, accuracy_score, recall_score, f1_score, roc_auc_score
from joblib import dump
import os
import pandas as pd
import numpy as np

sum_fp = 0
sum_tp = 0

# Ensure the time series order is preserved in the split
split = int(0.80 * len(X_df))
train_data, test_data = X_df.iloc[:split], X_df.iloc[split:]

# Train data
x_train = train_data.iloc[:, :-1].values
y_train = train_data['b_flag'].values
# Test data
x_test = test_data.iloc[:, :-1].values
y_test = test_data['b_flag'].values

# Scale Data
sc_mt = StandardScaler()
x_train = sc_mt.fit_transform(x_train)
x_test = sc_mt.transform(x_test)

os.makedirs('Dump_GBPUSD_D1_3112_Buy', exist_ok=True)
dump(sc_mt, 'Dump_GBPUSD_D1_3112_Buy/scaler.joblib')

# Initialise RandomForestClassifier with fixed hyperparameters
rf_classifier_mt = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features='sqrt',
    random_state=42
)

# Train Model
rf_classifier_mt.fit(x_train, y_train)

dump(rf_classifier_mt, 'Dump_GBPUSD_D1_3112_Buy/model.joblib')

# Predict on training data
y_train_pred = rf_classifier_mt.predict(x_train)

print("Confusion Matrix (Training Data):")
cm_train = confusion_matrix(y_train, y_train_pred)
print(cm_train)

train_false_positives = cm_train[0][1]
train_true_positives = cm_train[1][1]

train_precision = precision_score(y_train, y_train_pred)
train_accuracy = accuracy_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred)
train_roc_auc = roc_auc_score(y_train, rf_classifier_mt.predict_proba(x_train)[:, 1])

print('Training Data Results:')
print('WIN/LOSS-Diff:', round(100 * (train_precision - BreakEvenRatio), 2), '%')
print('False Positives:', train_false_positives)
print('True Positives:', train_true_positives)
print('Precision:', train_precision)
print('Accuracy:', train_accuracy)
print('Recall:', train_recall)
print('F1 Score:', train_f1)
print('ROC AUC:', train_roc_auc)
print('Ratio Total:', round(100 * (train_true_positives / (train_false_positives + train_true_positives)), 2))
print('BreakEvenRatio:', round(BreakEvenRatio, 2))
print('____________________________________________________________________________________________________________________________')

# Predict on testing data
y_test_pred = rf_classifier_mt.predict(x_test)

print("Confusion Matrix (Testing Data):")
cm_test = confusion_matrix(y_test, y_test_pred)
print(cm_test)

test_false_positives = cm_test[0][1]
test_true_positives = cm_test[1][1]

test_precision = precision_score(y_test, y_test_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
test_recall = recall_score(y_test, y_test_pred)
test_f1 = f1_score(y_test, y_test_pred)
test_roc_auc = roc_auc_score(y_test, rf_classifier_mt.predict_proba(x_test)[:, 1])

print('Testing Data Results:')
print('WIN/LOSS-Diff:', round(100 * (test_precision - BreakEvenRatio), 2), '%')
print('False Positives:', test_false_positives)
print('True Positives:', test_true_positives)
print('Precision:', test_precision)
print('Accuracy:', test_accuracy)
print('Recall:', test_recall)
print('F1 Score:', test_f1)
print('ROC AUC:', test_roc_auc)
print('Ratio Total:', round(100 * (test_true_positives / (test_false_positives + test_true_positives)), 2))
print('BreakEvenRatio:', round(BreakEvenRatio, 2))
print('____________________________________________________________________________________________________________________________')
