Setup and Path Definition

In [3]:
import os
import pandas as pd
import joblib
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Define the project root directory by navigating up from the current notebook's location.
# This assumes your notebook is in a subdirectory like 'notebooks/04_ml_research/'.
# If your notebook is nested deeper, you may need to add more '..' parts.
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))

Load Data

In [4]:
# Define paths using the project root
feature_file = os.path.join(PROJECT_ROOT, "data/features/ml_feature_data.parquet")
model_output_dir = os.path.join(PROJECT_ROOT, "qmind_quant/ml_models/models")
model_output_path = os.path.join(model_output_dir, "xgboost_v1.joblib")

# Load the feature-rich data
df = pd.read_parquet(feature_file)

Define Features (X) and Target (y)

In [5]:
features = [
    'returns_1d', 
    'returns_5d', 
    'returns_21d', 
    'volatility_21d', 
    'rsi_14d'
]

X = df[features]
y = df['target']

Split Data into Training and Testing Sets

In [6]:
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training set size: {len(X_train)}")
print(f"Testing set size: {len(X_test)}")

Training set size: 448
Testing set size: 112


Train the XGBoost Model

In [7]:
# Initialize and train the XGBoost classifier
# XGBoost has many parameters, but we'll start with some sensible defaults.
model = xgb.XGBClassifier(
    objective='binary:logistic',
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)

print("Training the XGBoost model...")
model.fit(X_train, y_train)
print("Model training complete.")

Training the XGBoost model...
Model training complete.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Evaluate the Model

In [8]:
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy on Test Set: {accuracy:.4f}\n")

print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Down', 'Up']))

Model Accuracy on Test Set: 0.5804

Classification Report:
              precision    recall  f1-score   support

        Down       0.39      0.33      0.36        40
          Up       0.66      0.72      0.69        72

    accuracy                           0.58       112
   macro avg       0.53      0.52      0.52       112
weighted avg       0.56      0.58      0.57       112



Save the Trained Model

In [9]:
os.makedirs(model_output_dir, exist_ok=True)
joblib.dump(model, model_output_path)
print(f"Model saved to: {model_output_path}")

Model saved to: /Users/enisyasaroglu/qmind_quant_platform/qmind_quant/ml_models/models/xgboost_v1.joblib
