In [1]:
!pip install xgboost scikit-learn joblib

Collecting xgboost
  Downloading xgboost-3.1.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
   ---------------------------------------- 0.0/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.3/72.0 MB ? eta -:--:--
   ---------------------------------------- 0.5/72.0 MB 1.3 MB/s eta 0:00:56
   ---------------------------------------- 0.8/72.0 MB 1.3 MB/s eta 0:00:56
    --------------------------------------- 1.0/72.0 MB 1.2 MB/s eta 0:00:58
    --------------------------------------- 1.3/72.0 MB 1.3 MB/s eta 0:00:56
    --------------------------------------- 1.6/72.0 MB 1.3 MB/s eta 0:00:56
   - -------------------------------------- 1.8/72.0 MB 1.3 MB/s eta 0:00:56
   - -------------------------------------- 2.1/72.0 MB 1.3 MB/s eta 0:00:56
   - -------------------------------------- 2.4/72.0 MB 1.2 MB/s eta 0:00:57
   - -------------------------------------- 2.6/72.0 MB 1.3 MB/s eta 0:00:56
   - ---------------

In [2]:
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

from xgboost import XGBClassifier


In [3]:
X = joblib.load("X_features.pkl")
y = joblib.load("y_labels.pkl")
feature_names = joblib.load("feature_names.pkl")

print(X.shape, y.shape)


(1999, 1171) (1999,)


In [4]:
X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train size:", X_train.shape)
print("Validation size:", X_val.shape)


Train size: (1599, 1171)
Validation size: (400, 1171)


In [5]:
xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="binary:logistic",
    eval_metric="logloss",
    random_state=42,
    n_jobs=-1
)


In [6]:
xgb_model.fit(X_train, y_train)
print("✅ Model training complete")


✅ Model training complete


In [7]:
y_pred = xgb_model.predict(X_val)
y_prob = xgb_model.predict_proba(X_val)[:, 1]

acc = accuracy_score(y_val, y_pred)
auc = roc_auc_score(y_val, y_prob)

print("Accuracy:", round(acc, 4))
print("ROC-AUC :", round(auc, 4))


Accuracy: 0.86
ROC-AUC : 0.941


In [8]:
print(classification_report(y_val, y_pred))


              precision    recall  f1-score   support

         0.0       0.89      0.81      0.85       193
         1.0       0.84      0.90      0.87       207

    accuracy                           0.86       400
   macro avg       0.86      0.86      0.86       400
weighted avg       0.86      0.86      0.86       400



In [9]:
joblib.dump(xgb_model, "xgboost_placement_model.pkl")
print("✅ XGBoost model saved")


✅ XGBoost model saved
