In [49]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, mean_squared_error, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## Read the CSV 


In [None]:
df = pd.read_csv("/kaggle/input/eurusd-period-h1/EURUSD.PERIOD_H1.csv")
df.head(10)

Split the data into training and validation sets

In [51]:
y = df['Signal'].values
X = df.drop(columns=["Signal"]).values #drop signal from x variables list 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Install XGBoost 

In [None]:
!pip install xgboost

Lets XGBoost and the pipeline for carrying the models

In [53]:
import xgboost as xgb
from sklearn.pipeline import Pipeline

Define the XGBoost parameters

In [54]:
params = {
    'objective': 'binary:logistic',  # For binary classification
    'learning_rate': 0.05,  # Learning rate
    'max_depth': 5,  # Maximum depth of a tree
    'n_estimators': 100,  # Number of trees
    'colsample_bytree': 0.9,  # Subsample ratio of columns when constructing each tree
    'subsample': 0.9,  # Subsample ratio of the training instances
    'eval_metric': ['auc', 'logloss']  # Evaluation metrics
}

In [None]:
# Create a pipeline with a scaler and the XGBoost classifier
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("xgb", xgb.XGBClassifier(**params))
])

# Fit the pipeline to the training data
pipe.fit(X_train, y_train)

# Testing XGBoost model

In [None]:
y_pred = pipe.predict(X_test) 

# For binary classification, you might want to threshold the predictions since these are probabilities
y_pred_binary = np.round(y_pred)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_binary)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.savefig("confusion-matrix xgboost")  # Display the heatmap


print("Classification Report\n",
      classification_report(y_test, y_pred_binary))

## Necessary imports for converting a model to sklearn and saving it ONNX format

Installing the necessary libraries first

In [None]:
!pip install onnxmltools onnxruntime onnx skl2onnx

In [58]:
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, to_onnx, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert import convert_xgboost as convert_xgboost_booster

Registering the Converter

In [59]:
update_registered_converter(
    xgb.XGBClassifier,
    "XGBClassifier",
    calculate_linear_classifier_output_shapes,
    convert_xgboost,
    options={"nocl": [False], "zipmap": [True, False, "columns"]},
)

Converting & Saving XGBoost model to onnx

In [60]:
model_onnx = convert_sklearn(
    pipe,
    "pipeline_xgboost",
    [("input", FloatTensorType([None, X_train.shape[1]]))],
    target_opset={"": 12, "ai.onnx.ml": 2},
)

# And save.
with open("xgboost.eurusd.h1.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())

# Install Light GBM 
Light GBM doesn't come with sklearn packages it can be installed separately, if not already

In [None]:
!pip install lightgbm

In [62]:
import lightgbm as lgb

## Set parameters for Light GBM Model

In [63]:
params = {
    'boosting_type': 'gbdt',  # Gradient Boosting Decision Tree
    'objective': 'binary',  # For binary classification (use 'regression' for regression tasks)
    'metric': ['auc','binary_logloss'],  # Evaluation metric
    'num_leaves': 25,  # Number of leaves in one tree
    'n_estimators' : 100, # number of trees
    'max_depth': 5,
    'learning_rate': 0.05,  # Learning rate
    'feature_fraction': 0.9  # Fraction of features to be used for each boosting round
}

## Train Light GBM model in a Pipeline

In [None]:
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lgbm", lgb.LGBMClassifier(**params))
])

# Fit the pipeline to the training data
pipe.fit(X_train, y_train)

## Make Predictions and Evaluate

After training the model, use it to make predictions and evaluate its performance.

In [None]:
y_pred = pipe.predict(X_test) # Changes from bst to pipe

# For binary classification, you might want to threshold the predictions
y_pred_binary = np.round(y_pred)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_binary)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.savefig("confusion-matrix lightgbm")  # Display the heatmap


print("Classification Report\n",
      classification_report(y_test, y_pred_binary))

# Save LightGBM to ONNX format

In [66]:
import onnxmltools
from onnxmltools.convert import convert_lightgbm
import onnxmltools.convert.common.data_types
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter


from skl2onnx.common.shape_calculator import (
    calculate_linear_classifier_output_shapes,
)  # noqa

from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
    convert_lightgbm,
)  # noqa

Let's register the converter

In [67]:
update_registered_converter(
    lgb.LGBMClassifier,
    "GBMClassifier",
    calculate_linear_classifier_output_shapes,
    convert_lightgbm,
    options={"nocl": [False], "zipmap": [True, False, "columns"]},
)

Finally let's convert Light GBM model to onnx format

In [68]:
model_onnx = convert_sklearn(
    pipe,
    "pipeline_lightgbm",
    [("input", FloatTensorType([None, X_train.shape[1]]))],
    target_opset={"": 12, "ai.onnx.ml": 2},
)

# And save.
with open("lightgbm.eurusd.h1.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())

# Other Models

In [69]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

Initiate the models we are going to use

In [70]:
classifiers = {
    "Logistic Regression": LogisticRegression(solver="lbfgs"),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC()
}

Lets Normalize the input data first

In [71]:
scaler = StandardScaler()

x_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

Run the same data across different models

In [None]:
# Evaluate performance using a dictionary

for classifier_name, classifier in classifiers.items():
    
    # Train the classifier
    classifier.fit(x_train, y_train)

    # Make predictions on the test set
    
    y_pred = classifier.predict(x_test)
    
    print(f"{classifier_name}\nClassification Report\n",classification_report(y_test, y_pred))
    
    # Create a heatmap using Seaborn for each classifier's confusion matrix
    plt.figure()  # Create a new figure for each confusion matrix
    ax = sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d")  # 'd' formats entries as integers
    ax.set_title(f"Confusion Matrix")
    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")
    plt.show()  # Display the heatmap

In [5]:
import os
import pandas as pd
# Cargar
tester_files_folder = "C:/Users/Administrador/AppData/Roaming/MetaQuotes/Tester/6C3C6A11D1C3791DD4DBF45421BF8028/Agent-127.0.0.1-3000/MQL5/Files"
df_buy = pd.read_csv(os.path.join(tester_files_folder, "buy_dataset.csv"))
df_sell = pd.read_csv(os.path.join(tester_files_folder, "sell_dataset.csv"))
print(f"Buy -> Trades: {df_buy.shape[0]} Features: {df_buy.shape[1]-1}")
print(f"Sell -> Trades: {df_sell.shape[0]} Features: {df_sell.shape[1]-1}")

Buy -> Trades: 1242 Features: 60
Sell -> Trades: 650 Features: 60


In [6]:
df_buy

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_51,data_52,data_53,data_54,data_55,data_56,data_57,data_58,data_59,target
0,-0.008586,9.738929,-0.539064,29.078246,14.714286,87.0,102809.0,57.792387,41.699383,-8.988015,...,79.482239,15.928571,212.0,103041.0,62.051991,63.705119,7.134730,-0.727189,1.455577,1.0
1,-0.000746,7.330428,3.321572,36.893935,6.714286,29.0,102456.0,19.557290,0.826238,-2.040041,...,87.438912,6.714286,23.0,102588.0,23.661369,8.940561,5.144431,-0.916203,-1.176265,0.0
2,-0.011963,9.966167,8.083067,42.162964,12.214286,40.0,71609.0,31.099196,15.606083,-7.796756,...,50.960114,13.500000,137.0,71733.0,47.065486,30.606715,5.183702,1.008717,0.267206,1.0
3,0.000000,8.820760,3.554822,64.246859,5.428571,26.0,90883.0,16.292329,10.719201,-2.983325,...,79.048642,5.928571,76.0,90880.0,14.191547,14.349443,1.685243,0.551618,0.867768,1.0
4,-0.000503,9.575029,1.179892,67.630000,14.142857,81.0,98988.0,45.611841,41.473199,-6.864313,...,74.307991,14.571429,160.0,99161.0,59.566434,61.653348,16.694548,0.131760,-1.706633,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1237,-0.001198,8.973375,8.402857,49.838558,7.142857,18.0,87193.0,28.269418,35.042958,-3.142502,...,77.938742,8.500000,80.0,87128.0,23.828554,43.696130,6.093501,-0.235514,-1.963223,0.0
1238,-0.001512,9.883676,3.528330,27.935236,10.364286,92.0,99196.0,30.160975,20.623191,-6.181450,...,52.506523,11.078571,160.0,99467.0,56.881530,28.947371,-2.228932,0.669108,2.052986,1.0
1239,-0.000472,9.805341,11.874773,47.833554,10.000000,44.0,89358.0,66.768555,16.415273,-1.674081,...,52.766809,10.500000,164.0,89914.0,81.925820,26.226915,9.084762,-0.959213,1.428714,1.0
1240,-0.003804,0.175240,4.691623,12.751968,13.142857,240.0,62936.0,59.052180,-22.332687,-22.399849,...,23.529003,12.785714,124.0,63512.0,45.541629,1.272303,-15.390256,-0.548784,0.560731,0.0


In [7]:
df_sell

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_51,data_52,data_53,data_54,data_55,data_56,data_57,data_58,data_59,target
0,0.009418,90.009126,-5.231879,72.776621,28.414286,439.0,74107.0,75.370222,-68.872468,14.732238,...,58.921718,26.057143,685.0,71870.0,81.074820,-104.128783,-9.220122,-0.614774,0.858057,1.0
1,-0.000865,90.221820,0.601281,47.817315,11.585714,217.0,98114.0,28.917891,14.705073,15.012136,...,65.868748,9.735714,112.0,98021.0,24.567711,3.625553,4.697599,-0.275744,-1.595616,0.0
2,0.000000,90.155739,7.832926,32.980141,8.500000,55.0,86336.0,36.364219,-64.233756,4.652681,...,29.450919,7.357143,69.0,86431.0,52.908748,-73.119251,-2.969040,-0.347275,-2.812220,1.0
3,0.000743,90.151681,-9.663004,21.655329,9.971429,117.0,105185.0,48.416130,-28.448724,1.919378,...,26.927074,10.742857,126.0,104884.0,51.288123,-37.106053,-11.704643,0.384893,-0.110540,1.0
4,0.004514,90.255997,-5.763775,74.082779,9.185714,1.0,96213.0,30.392920,5.319633,7.945322,...,73.194358,9.542857,134.0,95952.0,30.119622,-9.295636,-1.717865,-0.602106,0.378726,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
645,0.002264,93.279063,-8.362304,51.828357,11.178571,44.0,95118.0,60.637266,-38.657541,0.136052,...,19.305708,11.214286,109.0,94931.0,55.642182,-49.447976,-15.058467,0.457634,-2.732850,1.0
646,0.000394,90.719384,-1.641806,62.049104,7.214286,23.0,96323.0,18.411953,-5.538679,3.862814,...,48.658012,7.000000,67.0,96174.0,18.499730,-17.906739,-6.944302,1.016146,2.000000,1.0
647,0.004817,90.081662,-2.831765,26.542164,13.542857,76.0,71310.0,34.219433,-74.592807,10.879055,...,26.032820,10.857143,63.0,71271.0,28.394443,-92.289800,-7.869495,1.062092,0.251214,1.0
648,0.047876,90.091776,10.443535,47.306116,13.092857,320.0,86493.0,56.330919,20.423922,26.620835,...,36.062930,9.378571,78.0,85668.0,70.003643,-12.888360,-2.898352,0.383089,-0.897826,0.0
