In [49]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, mean_squared_error, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

## Read the CSV 


In [None]:
df = pd.read_csv("/kaggle/input/eurusd-period-h1/EURUSD.PERIOD_H1.csv")
df.head(10)

Split the data into training and validation sets

In [51]:
y = df['Signal'].values
X = df.drop(columns=["Signal"]).values #drop signal from x variables list 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Install XGBoost 

In [None]:
!pip install xgboost

Lets XGBoost and the pipeline for carrying the models

In [53]:
import xgboost as xgb
from sklearn.pipeline import Pipeline

Define the XGBoost parameters

In [54]:
params = {
    'objective': 'binary:logistic',  # For binary classification
    'learning_rate': 0.05,  # Learning rate
    'max_depth': 5,  # Maximum depth of a tree
    'n_estimators': 100,  # Number of trees
    'colsample_bytree': 0.9,  # Subsample ratio of columns when constructing each tree
    'subsample': 0.9,  # Subsample ratio of the training instances
    'eval_metric': ['auc', 'logloss']  # Evaluation metrics
}

In [None]:
# Create a pipeline with a scaler and the XGBoost classifier
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("xgb", xgb.XGBClassifier(**params))
])

# Fit the pipeline to the training data
pipe.fit(X_train, y_train)

# Testing XGBoost model

In [None]:
y_pred = pipe.predict(X_test) 

# For binary classification, you might want to threshold the predictions since these are probabilities
y_pred_binary = np.round(y_pred)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_binary)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.savefig("confusion-matrix xgboost")  # Display the heatmap


print("Classification Report\n",
      classification_report(y_test, y_pred_binary))

## Necessary imports for converting a model to sklearn and saving it ONNX format

Installing the necessary libraries first

In [None]:
!pip install onnxmltools onnxruntime onnx skl2onnx

In [58]:
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, to_onnx, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert import convert_xgboost as convert_xgboost_booster

Registering the Converter

In [59]:
update_registered_converter(
    xgb.XGBClassifier,
    "XGBClassifier",
    calculate_linear_classifier_output_shapes,
    convert_xgboost,
    options={"nocl": [False], "zipmap": [True, False, "columns"]},
)

Converting & Saving XGBoost model to onnx

In [60]:
model_onnx = convert_sklearn(
    pipe,
    "pipeline_xgboost",
    [("input", FloatTensorType([None, X_train.shape[1]]))],
    target_opset={"": 12, "ai.onnx.ml": 2},
)

# And save.
with open("xgboost.eurusd.h1.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())

# Install Light GBM 
Light GBM doesn't come with sklearn packages it can be installed separately, if not already

In [None]:
!pip install lightgbm

In [62]:
import lightgbm as lgb

## Set parameters for Light GBM Model

In [63]:
params = {
    'boosting_type': 'gbdt',  # Gradient Boosting Decision Tree
    'objective': 'binary',  # For binary classification (use 'regression' for regression tasks)
    'metric': ['auc','binary_logloss'],  # Evaluation metric
    'num_leaves': 25,  # Number of leaves in one tree
    'n_estimators' : 100, # number of trees
    'max_depth': 5,
    'learning_rate': 0.05,  # Learning rate
    'feature_fraction': 0.9  # Fraction of features to be used for each boosting round
}

## Train Light GBM model in a Pipeline

In [None]:
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("lgbm", lgb.LGBMClassifier(**params))
])

# Fit the pipeline to the training data
pipe.fit(X_train, y_train)

## Make Predictions and Evaluate

After training the model, use it to make predictions and evaluate its performance.

In [None]:
y_pred = pipe.predict(X_test) # Changes from bst to pipe

# For binary classification, you might want to threshold the predictions
y_pred_binary = np.round(y_pred)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred_binary)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.savefig("confusion-matrix lightgbm")  # Display the heatmap


print("Classification Report\n",
      classification_report(y_test, y_pred_binary))

# Save LightGBM to ONNX format

In [66]:
import onnxmltools
from onnxmltools.convert import convert_lightgbm
import onnxmltools.convert.common.data_types
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter


from skl2onnx.common.shape_calculator import (
    calculate_linear_classifier_output_shapes,
)  # noqa

from onnxmltools.convert.lightgbm.operator_converters.LightGbm import (
    convert_lightgbm,
)  # noqa

Let's register the converter

In [67]:
update_registered_converter(
    lgb.LGBMClassifier,
    "GBMClassifier",
    calculate_linear_classifier_output_shapes,
    convert_lightgbm,
    options={"nocl": [False], "zipmap": [True, False, "columns"]},
)

Finally let's convert Light GBM model to onnx format

In [68]:
model_onnx = convert_sklearn(
    pipe,
    "pipeline_lightgbm",
    [("input", FloatTensorType([None, X_train.shape[1]]))],
    target_opset={"": 12, "ai.onnx.ml": 2},
)

# And save.
with open("lightgbm.eurusd.h1.onnx", "wb") as f:
    f.write(model_onnx.SerializeToString())

# Other Models

In [69]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

Initiate the models we are going to use

In [70]:
classifiers = {
    "Logistic Regression": LogisticRegression(solver="lbfgs"),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC()
}

Lets Normalize the input data first

In [71]:
scaler = StandardScaler()

x_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

Run the same data across different models

In [None]:
# Evaluate performance using a dictionary

for classifier_name, classifier in classifiers.items():
    
    # Train the classifier
    classifier.fit(x_train, y_train)

    # Make predictions on the test set
    
    y_pred = classifier.predict(x_test)
    
    print(f"{classifier_name}\nClassification Report\n",classification_report(y_test, y_pred))
    
    # Create a heatmap using Seaborn for each classifier's confusion matrix
    plt.figure()  # Create a new figure for each confusion matrix
    ax = sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d")  # 'd' formats entries as integers
    ax.set_title(f"Confusion Matrix")
    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")
    plt.show()  # Display the heatmap