In [1]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.0-py3-none-win_amd64.whl (150.0 MB)
   ---------------------------------------- 0.0/150.0 MB ? eta -:--:--
   ---------------------------------------- 1.6/150.0 MB 8.8 MB/s eta 0:00:17
    --------------------------------------- 3.7/150.0 MB 9.4 MB/s eta 0:00:16
   - -------------------------------------- 5.8/150.0 MB 10.0 MB/s eta 0:00:15
   -- ------------------------------------- 7.9/150.0 MB 10.0 MB/s eta 0:00:15
   -- ------------------------------------- 10.2/150.0 MB 10.3 MB/s eta 0:00:14
   --- ------------------------------------ 12.6/150.0 MB 10.5 MB/s eta 0:00:14
   --- ------------------------------------ 14.9/150.0 MB 10.5 MB/s eta 0:00:13
   ---- ----------------------------------- 17.0/150.0 MB 10.7 MB/s eta 0:00:13
   ----- ---------------------------------- 19.7/150.0 MB 10.9 MB/s eta 0:00:12
   ------ --------------------------------- 22.5/150.0 MB 11.1 MB/s

In [2]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

In [4]:
import pandas as pd

# Load preprocessed data
X_train = pd.read_csv("../data/X_train.csv")
X_test = pd.read_csv("../data/X_test.csv")
y_train = pd.read_csv("../data/y_train.csv")
y_test = pd.read_csv("../data/y_test.csv")

In [5]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [6]:

# 📌 Step 2: Define model parameters
params = {
    'objective': 'binary:logistic',  # Binary classification
    'eval_metric': 'logloss',        # Logarithmic loss
    'max_depth': 6,                   # Depth of trees
    'learning_rate': 0.1,              # Step size shrinkage
    'n_estimators': 100,               # Number of trees
    'random_state': 42
}


In [7]:
# 📌 Step 3: Train the model
model = xgb.train(params, dtrain, num_boost_round=100)

# 📌 Step 4: Make predictions
y_pred_prob = model.predict(dtest)
y_pred = [1 if prob > 0.5 else 0 for prob in y_pred_prob]  # Convert probabilities to binary

Parameters: { "n_estimators" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [8]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [9]:

print(f"✅ XGBoost Model Accuracy: {accuracy * 100:.2f}%")
print("\n📊 Classification Report:\n", report)

✅ XGBoost Model Accuracy: 94.14%

📊 Classification Report:
               precision    recall  f1-score   support

         0.0       0.95      0.99      0.97    278585
         1.0       0.83      0.53      0.65     31384

    accuracy                           0.94    309969
   macro avg       0.89      0.76      0.81    309969
weighted avg       0.94      0.94      0.94    309969



In [11]:
import pickle

# Save trained XGBoost model
with open("../flask-api/models/xgboost_model.pkl", "wb") as f:
    pickle.dump(model, f)