In [8]:
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score, auc, roc_curve, confusion_matrix, precision_score, recall_score, f1_score, classification_report
from xgboost import XGBClassifier



In [9]:
xgb = XGBClassifier()
xgb.fit(x_train, y_train)
predicted = xgb.predict(x_test)
print("Accuracy Using XGBoost is {} %".format(accuracy_score(predicted, y_test)*100))
print(classification_report(y_test, predicted))

Accuracy Using XGBoost is 85.91362126245848 %
              precision    recall  f1-score   support

         0.0       0.89      0.94      0.91     10663
         1.0       0.72      0.56      0.63      2882

    accuracy                           0.86     13545
   macro avg       0.80      0.75      0.77     13545
weighted avg       0.85      0.86      0.85     13545



In [7]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],  # Number of trees in the forest
    'max_depth': [3, 5, 7],  # Maximum depth of each tree
    'learning_rate': [0.01, 0.1, 0.3]  # Learning rate
}

# Perform GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(x_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Use the best model to make predictions
best_xgb = grid_search.best_estimator_
predicted = best_xgb.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(predicted, y_test)
print("Accuracy using XGBoost after hyperparameter tuning:", accuracy * 100, "%")
print(classification_report(y_test, predicted))

Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 300}
Accuracy using XGBoost after hyperparameter tuning: 86.37873754152824 %


In [10]:
smote = SMOTE(random_state=42)
x_train_smote, y_train_smote = smote.fit_resample(x_train, y_train)

# Define the XGBoost classifier
xgb = XGBClassifier(n_estimators=300, max_depth=7, learning_rate=0.1)

# Fit the model using the SMOTE-resampled data
xgb.fit(x_train_smote, y_train_smote)

# Make predictions on the test set
predicted = xgb.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predicted)
print("Accuracy using XGBoost after SMOTE:", accuracy * 100, "%")
print(classification_report(y_test, predicted))

Accuracy using XGBoost after SMOTE: 85.98006644518273 %
              precision    recall  f1-score   support

         0.0       0.90      0.93      0.91     10663
         1.0       0.69      0.62      0.65      2882

    accuracy                           0.86     13545
   macro avg       0.80      0.77      0.78     13545
weighted avg       0.85      0.86      0.86     13545



In [12]:
from imblearn.under_sampling import RandomUnderSampler
undersampler = RandomUnderSampler(random_state=42)
x_train_undersampled, y_train_undersampled = undersampler.fit_resample(x_train, y_train)

# Define the XGBoost classifier
xgb = XGBClassifier(n_estimators=300, max_depth=7, learning_rate=0.1)

# Fit the model using the undersampled data
xgb.fit(x_train_undersampled, y_train_undersampled)

# Make predictions on the test set
predicted = xgb.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predicted)
print("Accuracy using XGBoost after undersampling:", accuracy * 100, "%")
print(classification_report(y_test, predicted))

Accuracy using XGBoost after undersampling: 80.5094130675526 %
              precision    recall  f1-score   support

         0.0       0.94      0.80      0.87     10663
         1.0       0.53      0.81      0.64      2882

    accuracy                           0.81     13545
   macro avg       0.73      0.81      0.75     13545
weighted avg       0.85      0.81      0.82     13545

