<a href="https://colab.research.google.com/github/anhthu54/FaceRecognition/blob/main/TrainingModelCreditCard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
cd /content/drive/MyDrive/Colab Notebooks/Data

/content/drive/MyDrive/Colab Notebooks/Data


In [4]:
import pandas as pd
df = pd.read_csv('creditcard.csv')

In [5]:
X = df.drop('Class', axis=1)
y = df['Class']

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.3, random_state = 42)

In [7]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler,RobustScaler
standard_scaler = StandardScaler()
robust_scaler = RobustScaler()
X_train['Time'] = standard_scaler.fit_transform(X_train['Time'].values.reshape(-1,1))
X_train['Amount'] = robust_scaler.fit_transform(X_train['Amount'].values.reshape(-1,1))

X_test['Time'] = standard_scaler.transform(X_test['Time'].values.reshape(-1,1))
X_test['Amount'] = robust_scaler.transform(X_test['Amount'].values.reshape(-1,1))

# Khi không dùng các phương thức resampling

In [8]:
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, recall_score, precision_score
from sklearn.model_selection import cross_val_score, StratifiedKFold
import time
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from imblearn.under_sampling import TomekLinks, RandomUnderSampler, NearMiss
from imblearn.over_sampling import SMOTE, RandomOverSampler, ADASYN
from imblearn.pipeline import Pipeline

In [None]:
classifiers = {
    "Logistic Regression": LogisticRegression(C=1.0, max_iter=1000, solver='liblinear'),
    "Decision Tree": DecisionTreeClassifier(max_depth=None, min_samples_split=2),
    "Random Forest Classifier": RandomForestClassifier(n_estimators=100, max_features='sqrt', max_depth=None, min_samples_split=2, n_jobs=-1),
    "XGBoost": xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, eval_metric='logloss'), # Removed unused parameter
    # "Support Vector Classifier": SVC(C=1.0, kernel='rbf', probability=True)
}

def train_and_evaluate_models(X_train, y_train, X_test, y_test, classifiers, cv_folds=5):
    # Khởi tạo cross-validation
    cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)

    # Huấn luyện và đánh giá từng mô hình
    for name, clf in classifiers.items():
        # Cross-validation với các chỉ số khác nhau
        cv_accuracy = cross_val_score(clf, X_train, y_train, cv=cv, scoring='accuracy')
        cv_precision = cross_val_score(clf, X_train, y_train, cv=cv, scoring='precision')
        cv_roc_auc = cross_val_score(clf, X_train, y_train, cv=cv, scoring='roc_auc')
        cv_f1 = cross_val_score(clf, X_train, y_train, cv=cv, scoring='f1')
        cv_recall = cross_val_score(clf, X_train, y_train, cv=cv, scoring='recall')

        start_time = time.time()
        # Huấn luyện trên toàn bộ tập huấn luyện
        clf.fit(X_train, y_train)
        end_time = time.time()
        training_time = end_time - start_time
        # Dự đoán trên tập kiểm tra
        y_pred = clf.predict(X_test)
        y_pred_proba = clf.predict_proba(X_test)[:, 1]

        # Đánh giá trên tập kiểm tra
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred_proba)
        f1 = f1_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)

        # In kết quả
        print(f"Model: {name}")
        print(f"Cross-Validation Accuracy: {cv_accuracy.mean():.4f}")
        print(f"Cross-Validation Precision: {cv_precision.mean():.4f}")
        print(f"Cross-Validation ROC AUC: {cv_roc_auc.mean():.4f}")
        print(f"Cross-Validation F1 Score: {cv_f1.mean():.4f}")
        print(f"Cross-Validation Recall: {cv_recall.mean():.4f}")
        print(f"Training Time: {training_time:.4f} seconds")
        print(f"Test Set Accuracy: {accuracy:.4f}")
        print(f"Test Set ROC AUC: {roc_auc:.4f}")
        print(f"Test Set F1 Score: {f1:.4f}")
        print(f"Test Set Recall: {recall:.4f}")
        print("-" * 30)

train_and_evaluate_models(X_train, y_train, X_test, y_test, classifiers)

Model: Logistic Regression
Cross-Validation Accuracy: 0.9992
Cross-Validation Precision: 0.8690
Cross-Validation ROC AUC: 0.9812
Cross-Validation F1 Score: 0.7327
Cross-Validation Recall: 0.6337
Training Time: 6.2805 seconds
Test Set Accuracy: 0.9992
Test Set ROC AUC: 0.9545
Test Set F1 Score: 0.7216
Test Set Recall: 0.6216
------------------------------
Model: Decision Tree
Cross-Validation Accuracy: 0.9991
Cross-Validation Precision: 0.7164
Cross-Validation ROC AUC: 0.8688
Cross-Validation F1 Score: 0.7221
Cross-Validation Recall: 0.7469
Training Time: 31.7997 seconds
Test Set Accuracy: 0.9991
Test Set ROC AUC: 0.8410
Test Set F1 Score: 0.7214
Test Set Recall: 0.6824
------------------------------
Model: Random Forest Classifier
Cross-Validation Accuracy: 0.9995
Cross-Validation Precision: 0.9292
Cross-Validation ROC AUC: 0.9490
Cross-Validation F1 Score: 0.8510
Cross-Validation Recall: 0.7848
Training Time: 252.7950 seconds
Test Set Accuracy: 0.9995
Test Set ROC AUC: 0.9307
Test Set


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "use_label_encoder" } are not used.


    E.g. tree_method = "hist", device = "cuda"

Parameters: { "use_label_encoder" } are not used.



ValueError: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/sklearn.py", line 1531, in fit
    self._Booster = train(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/training.py", line 181, in train
    bst.update(dtrain, iteration=i, fobj=obj)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 2100, in update
    _check_call(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 284, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:26] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [18:59:26] /workspace/src/tree/updater_gpu_hist.cu:867: Check failed: ctx_->Ordinal() >= 0 (-1 vs. 0) : Must have at least one device
Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2b4c3) [0x7e607e52b4c3]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2bf84) [0x7e607e52bf84]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (6) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (8) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]



Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2c154) [0x7e607e52c154]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x7e60e8ad93e9]



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/sklearn.py", line 1531, in fit
    self._Booster = train(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/training.py", line 181, in train
    bst.update(dtrain, iteration=i, fobj=obj)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 2100, in update
    _check_call(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 284, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:27] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [18:59:27] /workspace/src/tree/updater_gpu_hist.cu:867: Check failed: ctx_->Ordinal() >= 0 (-1 vs. 0) : Must have at least one device
Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2b4c3) [0x7e607e52b4c3]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2bf84) [0x7e607e52bf84]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (6) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (8) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]



Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2c154) [0x7e607e52c154]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x7e60e8ad93e9]



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/sklearn.py", line 1531, in fit
    self._Booster = train(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/training.py", line 181, in train
    bst.update(dtrain, iteration=i, fobj=obj)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 2100, in update
    _check_call(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 284, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:29] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [18:59:29] /workspace/src/tree/updater_gpu_hist.cu:867: Check failed: ctx_->Ordinal() >= 0 (-1 vs. 0) : Must have at least one device
Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2b4c3) [0x7e607e52b4c3]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2bf84) [0x7e607e52bf84]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (6) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (8) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]



Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2c154) [0x7e607e52c154]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x7e60e8ad93e9]



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/sklearn.py", line 1531, in fit
    self._Booster = train(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/training.py", line 181, in train
    bst.update(dtrain, iteration=i, fobj=obj)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 2100, in update
    _check_call(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 284, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:30] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [18:59:30] /workspace/src/tree/updater_gpu_hist.cu:867: Check failed: ctx_->Ordinal() >= 0 (-1 vs. 0) : Must have at least one device
Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2b4c3) [0x7e607e52b4c3]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2bf84) [0x7e607e52bf84]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (6) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (8) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]



Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2c154) [0x7e607e52c154]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x7e60e8ad93e9]



--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/sklearn.py", line 1531, in fit
    self._Booster = train(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 726, in inner_f
    return func(**kwargs)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/training.py", line 181, in train
    bst.update(dtrain, iteration=i, fobj=obj)
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 2100, in update
    _check_call(
  File "/usr/local/lib/python3.10/dist-packages/xgboost/core.py", line 284, in _check_call
    raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:32] /workspace/src/tree/updater_gpu_hist.cu:861: Exception in gpu_hist: [18:59:32] /workspace/src/tree/updater_gpu_hist.cu:867: Check failed: ctx_->Ordinal() >= 0 (-1 vs. 0) : Must have at least one device
Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2b4c3) [0x7e607e52b4c3]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2bf84) [0x7e607e52bf84]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (6) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (8) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]



Stack trace:
  [bt] (0) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x22dbbc) [0x7e607da2dbbc]
  [bt] (1) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0xd2c154) [0x7e607e52c154]
  [bt] (2) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x57f196) [0x7e607dd7f196]
  [bt] (3) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x580564) [0x7e607dd80564]
  [bt] (4) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(+0x5cae68) [0x7e607ddcae68]
  [bt] (5) /usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x6f) [0x7e607d93742f]
  [bt] (6) /lib/x86_64-linux-gnu/libffi.so.8(+0x7e2e) [0x7e60e8ab3e2e]
  [bt] (7) /lib/x86_64-linux-gnu/libffi.so.8(+0x4493) [0x7e60e8ab0493]
  [bt] (8) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0xa3e9) [0x7e60e8ad93e9]




In [None]:
new_classifiers = {
      "XGBoost": xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, eval_metric='logloss'), # Removed unused parameter
}
train_and_evaluate_models(X_train, y_train, X_test, y_test, new_classifiers)

Model: XGBoost
Cross-Validation Accuracy: 0.9996
Cross-Validation Precision: 0.9459
Cross-Validation ROC AUC: 0.9765
Cross-Validation F1 Score: 0.8678
Cross-Validation Recall: 0.8023
Training Time: 3.5510 seconds
Test Set Accuracy: 0.9995
Test Set ROC AUC: 0.9673
Test Set F1 Score: 0.8485
Test Set Recall: 0.7568
------------------------------


#Dùng các phương thức resampling

In [9]:
def train_and_evaluate_resampling_models(X_train, y_train, X_test, y_test, classifiers, resampling_methods, cv_folds=5):
    # Khởi tạo cross-validation
    cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=42)

    for resampling_name, resampler in resampling_methods.items():
        print(f"Resampling method: {resampling_name}")

        # Huấn luyện và đánh giá từng mô hình
        for name, clf in classifiers.items():
            # Tạo pipeline với resampling và classifier
            pipeline = Pipeline([
                ('resampling', resampler),
                ('classification', clf)
            ])

            # Cross-validation với các chỉ số khác nhau
            cv_accuracy = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='accuracy')
            cv_precision = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='precision')
            cv_roc_auc = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='roc_auc')
            cv_f1 = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='f1')
            cv_recall = cross_val_score(pipeline, X_train, y_train, cv=cv, scoring='recall')

            start_time = time.time()
            # Huấn luyện trên toàn bộ tập huấn luyện với resampling
            pipeline.fit(X_train, y_train)
            end_time = time.time()
            training_time = end_time - start_time

            # Dự đoán trên tập kiểm tra
            y_pred = pipeline.predict(X_test)
            y_pred_proba = pipeline.predict_proba(X_test)[:, 1]

            # Đánh giá trên tập kiểm tra
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred)
            roc_auc = roc_auc_score(y_test, y_pred_proba)
            f1 = f1_score(y_test, y_pred)
            recall = recall_score(y_test, y_pred)

            # In kết quả
            print(f"Model: {name}")
            print(f"Cross-Validation Accuracy: {cv_accuracy.mean():.4f}")
            print(f"Cross-Validation Precision: {cv_precision.mean():.4f}")
            print(f"Cross-Validation ROC AUC: {cv_roc_auc.mean():.4f}")
            print(f"Cross-Validation F1 Score: {cv_f1.mean():.4f}")
            print(f"Cross-Validation Recall: {cv_recall.mean():.4f}")
            print(f"Training Time: {training_time:.4f} seconds")
            print(f"Test Set Accuracy: {accuracy:.4f}")
            print(f"Test Set ROC AUC: {roc_auc:.4f}")
            print(f"Test Set F1 Score: {f1:.4f}")
            print(f"Test Set Recall: {recall:.4f}")
            print("-" * 30)
        print("=" * 60)


In [None]:

classifiers = {
    "Logistic Regression": LogisticRegression(C=1.0, max_iter=1000, solver='liblinear'),
    "Decision Tree": DecisionTreeClassifier(max_depth=None, min_samples_split=2),
    "XGBoost": xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, eval_metric='logloss'), # Removed unused parameter
    "Random Forest Classifier": RandomForestClassifier(n_estimators=100, max_features='sqrt', max_depth=None, min_samples_split=2, n_jobs=-1),
    # "Support Vector Classifier": SVC(C=1.0, kernel='rbf', probability=True)
}
resampling_methods = {
    'Tomek Links': TomekLinks(sampling_strategy='auto'),
    'Random Undersampling': RandomUnderSampler(sampling_strategy=0.5),
    'Near Miss': NearMiss(sampling_strategy=0.5),
    'SMOTE': SMOTE(sampling_strategy=0.5),
    'Random Oversampling': RandomOverSampler(sampling_strategy=0.5),
    'ADASYN': ADASYN(sampling_strategy=0.5)
}
train_and_evaluate_resampling_models(X_train, y_train, X_test, y_test, classifiers, resampling_methods)

Resampling method: Tomek Links


In [11]:
#continue
continue_classifier = {
    "XGBoost": xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, eval_metric='logloss'), # Removed unused parameter
    "Random Forest Classifier": RandomForestClassifier(n_estimators=100, max_features='sqrt', max_depth=None, min_samples_split=2, n_jobs=-1),
}
continue_resampling_methods = {
      'SMOTE': SMOTE(sampling_strategy=0.5),
}
train_and_evaluate_resampling_models(X_train, y_train, X_test, y_test, continue_classifier, continue_resampling_methods)



Resampling method: SMOTE
Model: XGBoost
Cross-Validation Accuracy: 0.9986
Cross-Validation Precision: 0.5596
Cross-Validation ROC AUC: 0.9758
Cross-Validation F1 Score: 0.6700
Cross-Validation Recall: 0.8459
Training Time: 5.9038 seconds
Test Set Accuracy: 0.9982
Test Set ROC AUC: 0.9714
Test Set F1 Score: 0.6165
Test Set Recall: 0.8311
------------------------------
Model: Random Forest Classifier
Cross-Validation Accuracy: 0.9995
Cross-Validation Precision: 0.8907
Cross-Validation ROC AUC: 0.9701
Cross-Validation F1 Score: 0.8576
Cross-Validation Recall: 0.8314
Training Time: 275.9082 seconds
Test Set Accuracy: 0.9995
Test Set ROC AUC: 0.9654
Test Set F1 Score: 0.8399
Test Set Recall: 0.7973
------------------------------


In [11]:
classifiers = {
    "Logistic Regression": LogisticRegression(C=1.0, max_iter=1000, solver='liblinear'),
    "Decision Tree": DecisionTreeClassifier(max_depth=None, min_samples_split=2),
    "XGBoost": xgb.XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, eval_metric='logloss'), # Removed unused parameter
    "Random Forest Classifier": RandomForestClassifier(n_estimators=100, max_features='sqrt', max_depth=None, min_samples_split=2, n_jobs=-1),
    # "Support Vector Classifier": SVC(C=1.0, kernel='rbf', probability=True)
}
next_resampling = {
    'Random Oversampling': RandomOverSampler(sampling_strategy=0.5),
    'ADASYN': ADASYN(sampling_strategy=0.5),
}
train_and_evaluate_resampling_models(X_train, y_train, X_test, y_test, classifiers, next_resampling)

Resampling method: Random Oversampling
Model: Logistic Regression
Cross-Validation Accuracy: 0.9891
Cross-Validation Precision: 0.1278
Cross-Validation ROC AUC: 0.9826
Cross-Validation F1 Score: 0.2229
Cross-Validation Recall: 0.9098
Training Time: 5.6352 seconds
Test Set Accuracy: 0.9893
Test Set ROC AUC: 0.9676
Test Set F1 Score: 0.2186
Test Set Recall: 0.8649
------------------------------
Model: Decision Tree
Cross-Validation Accuracy: 0.9991
Cross-Validation Precision: 0.7757
Cross-Validation ROC AUC: 0.8631
Cross-Validation F1 Score: 0.7546
Cross-Validation Recall: 0.7413
Training Time: 21.2695 seconds
Test Set Accuracy: 0.9991
Test Set ROC AUC: 0.8478
Test Set F1 Score: 0.7357
Test Set Recall: 0.6959
------------------------------
Model: XGBoost
Cross-Validation Accuracy: 0.9994
Cross-Validation Precision: 0.8476
Cross-Validation ROC AUC: 0.9727
Cross-Validation F1 Score: 0.8325
Cross-Validation Recall: 0.8197
Training Time: 7.6897 seconds
Test Set Accuracy: 0.9993
Test Set ROC 

**Class weight**