In [1]:
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, accuracy_score, roc_curve, roc_auc_score, auc, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder, label_binarize, StandardScaler

!pip install xgboost
!pip install imbalance-xgboost



In [2]:
data = pd.read_csv("car-vgood.csv")
data

Unnamed: 0,Buying,Maint,Doors,Persons,Lug_boot,Safety,class
0,vhigh,vhigh,2,2,small,low,negative
1,vhigh,vhigh,2,2,small,med,negative
2,vhigh,vhigh,2,2,small,high,negative
3,vhigh,vhigh,2,2,med,low,negative
4,vhigh,vhigh,2,2,med,med,negative
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,negative
1724,low,low,5more,more,med,high,positive
1725,low,low,5more,more,big,low,negative
1726,low,low,5more,more,big,med,negative


In [3]:
for i in data.columns:
    data[i] = LabelEncoder().fit_transform(data[i])

In [4]:
y = data['class']
del data['class']
X = data
del data

In [5]:
from xgboost import XGBClassifier

acc_scores_xgb = []
roc_auc_scores_xgb = []
precision_scores_xgb = []
recall_scores_xgb = []
f1_scores_xgb = []

# Define the K-fold Cross Validator
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(X, y):
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')
    
    xgbc = XGBClassifier()

    xgbc.fit(X.iloc[train], y.iloc[train])
    y_pred = xgbc.predict(X.iloc[test])

    acc_scores_xgb.append(accuracy_score(y.iloc[test], y_pred))
    roc_auc_scores_xgb.append(roc_auc_score(y.iloc[test], y_pred))
    precision_scores_xgb.append(precision_score(y.iloc[test], y_pred))
    recall_scores_xgb.append(recall_score(y.iloc[test], y_pred))
    f1_scores_xgb.append(f1_score(y.iloc[test], y_pred))
    
    # Increase fold number
    fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
------------------------------------------------------------------------
Training for fold 2 ...
------------------------------------------------------------------------
Training for fold 3 ...
------------------------------------------------------------------------
Training for fold 4 ...
------------------------------------------------------------------------
Training for fold 5 ...
------------------------------------------------------------------------
Training for fold 6 ...
------------------------------------------------------------------------
Training for fold 7 ...
------------------------------------------------------------------------
Training for fold 8 ...
------------------------------------------------------------------------
Training for fold 9 ...
------------------------------------------------------------------------
Training for fold 10 ...


In [6]:
print(f"Accuracy Mean: {np.mean(acc_scores_xgb)}")
print(f"Accuracy Standard Deviation: {np.std(acc_scores_xgb)}")
print("\n")
print(f"ROC-AUC Mean: {np.mean(roc_auc_scores_xgb)}")
print(f"ROC-AUC Standard Deviation: {np.std(roc_auc_scores_xgb)}")
print("\n")
print(f"Precision Mean: {np.mean(precision_scores_xgb)}")
print(f"Precision Standard Deviation: {np.std(precision_scores_xgb)}")
print("\n")
print(f"Recall Mean: {np.mean(recall_scores_xgb)}")
print(f"Recall Standard Deviation: {np.std(recall_scores_xgb)}")
print("\n")
print(f"F1 Mean: {np.mean(f1_scores_xgb)}")
print(f"F1 Standard Deviation: {np.std(f1_scores_xgb)}")

Accuracy Mean: 0.9976844619765665
Accuracy Standard Deviation: 0.0038475047619118993


ROC-AUC Mean: 0.9690476190476192
ROC-AUC Standard Deviation: 0.05329292687047475


Precision Mean: 1.0
Precision Standard Deviation: 0.0


Recall Mean: 0.9380952380952381
Recall Standard Deviation: 0.1065858537409495


F1 Mean: 0.9646153846153845
F1 Standard Deviation: 0.0627005657609687


In [7]:
from imxgboost.imbalance_xgb import imbalance_xgboost as ixgb

acc_scores_ixgb = []
roc_auc_scores_ixgb = []
precision_scores_ixgb = []
recall_scores_ixgb = []
f1_scores_ixgb = []

# Define the K-fold Cross Validator
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(X, y):
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

    ixgbc = ixgb()
    ixgbc.fit(X.iloc[train].to_numpy(), y.iloc[train].to_numpy())
    y_pred = ixgbc.predict_determine(X.iloc[test].to_numpy(), y=None)

    acc_scores_ixgb.append(accuracy_score(y.iloc[test], y_pred))
    roc_auc_scores_ixgb.append(roc_auc_score(y.iloc[test], y_pred))
    precision_scores_ixgb.append(precision_score(y.iloc[test], y_pred))
    recall_scores_ixgb.append(recall_score(y.iloc[test], y_pred))
    f1_scores_ixgb.append(f1_score(y.iloc[test], y_pred))
    
    # Increase fold number
    fold_no = fold_no + 1

------------------------------------------------------------------------
Training for fold 1 ...
------------------------------------------------------------------------
Training for fold 2 ...
------------------------------------------------------------------------
Training for fold 3 ...
------------------------------------------------------------------------
Training for fold 4 ...
------------------------------------------------------------------------
Training for fold 5 ...
------------------------------------------------------------------------
Training for fold 6 ...
------------------------------------------------------------------------
Training for fold 7 ...
------------------------------------------------------------------------
Training for fold 8 ...
------------------------------------------------------------------------
Training for fold 9 ...
------------------------------------------------------------------------
Training for fold 10 ...


In [8]:
print(f"Accuracy Mean: {np.mean(acc_scores_ixgb)}")
print(f"Accuracy Standard Deviation: {np.std(acc_scores_ixgb)}")
print("\n")
print(f"ROC-AUC Mean: {np.mean(roc_auc_scores_ixgb)}")
print(f"ROC-AUC Standard Deviation: {np.std(roc_auc_scores_ixgb)}")
print("\n")
print(f"Precision Mean: {np.mean(precision_scores_ixgb)}")
print(f"Precision Standard Deviation: {np.std(precision_scores_ixgb)}")
print("\n")
print(f"Recall Mean: {np.mean(recall_scores_ixgb)}")
print(f"Recall Standard Deviation: {np.std(recall_scores_ixgb)}")
print("\n")
print(f"F1 Mean: {np.mean(f1_scores_ixgb)}")
print(f"F1 Standard Deviation: {np.std(f1_scores_ixgb)}")

Accuracy Mean: 0.9942027725887023
Accuracy Standard Deviation: 0.0068792153317843475


ROC-AUC Mean: 0.9660517175631526
ROC-AUC Standard Deviation: 0.05513442878954677


Precision Mean: 0.9166666666666666
Precision Standard Deviation: 0.11636866703140786


Recall Mean: 0.9357142857142857
Recall Standard Deviation: 0.1086142355043528


F1 Mean: 0.9222644022644022
F1 Standard Deviation: 0.09703687664255813
