In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/combined_data.csv")

X = df.drop(columns=["Ticker", "Date", "macro_stress"])
y = df["macro_stress"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [None]:
neg, pos = y_train.value_counts()
scale_pos = neg / pos

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [175, 200, 225],
    "max_depth": [2, 3, 4],
    "learning_rate": [0.03, 0.04],
    "subsample": [0.85, 0.9],
    "colsample_bytree": [0.75],
    "scale_pos_weight": [scale_pos],
    "min_child_weight": [1, 2],
    "gamma": [0.1]
}

xgb = XGBClassifier(
    use_label_encoder=False,
    eval_metric="logloss",
    random_state=42
)

grid = GridSearchCV(xgb, param_grid, cv=5, scoring="f1", n_jobs=-1)


In [None]:
grid.fit(X_train, y_train)

best_model = grid.best_estimator_
print("Best model parameters:", best_model.get_params())


In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

y_pred = best_model.predict(X_test)
y_prob = best_model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))
print(classification_report(y_test, y_pred))


In [None]:
import joblib

joblib.dump(best_model, "../models/cris_model_78.pkl")
