In [2]:
# Import necessary libraries
import xgboost as xgb
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import numpy as np

file_path_train = r"C:\Users\user\Desktop\bitirme_projesi\card_transdata_part1.csv"
file_path_test = r"C:\Users\user\Desktop\bitirme_projesi\card_transdata_part2.csv"

df_train = pd.read_csv(file_path_train)
df_test = pd.read_csv(file_path_test)

In [3]:
# Separate features and target
X_train = df_train.drop("fraud", axis=1)
y_train = df_train["fraud"]
X_test = df_test.drop("fraud", axis=1)
y_test = df_test["fraud"]

# Standardize feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Class distribution before SMOTE:")
print(pd.Series(y_train).value_counts())

Class distribution before SMOTE:
fraud
0.0    456298
1.0     43702
Name: count, dtype: int64


In [4]:
# Apply SMOTE to balance the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_scaled, y_train)

print("\nClass distribution after SMOTE:")
print(pd.Series(y_train_resampled).value_counts())


Class distribution after SMOTE:
fraud
0.0    456298
1.0    456298
Name: count, dtype: int64


In [5]:
# Initialize XGBoost model
xgb_model = xgb.XGBClassifier(
    n_estimators=200,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss"
)

xgb_model.fit(X_train_resampled, y_train_resampled)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [6]:
y_pred = xgb_model.predict(X_test_scaled)
y_prob = xgb_model.predict_proba(X_test_scaled)[:, 1]

# Evaluation
print("--- Model Evaluation ---")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))
print("\nROC AUC:", roc_auc_score(y_test, y_prob))

--- Model Evaluation ---
Confusion Matrix:
 [[455602    697]
 [   149  43552]]

Classification Report:
               precision    recall  f1-score   support

         0.0     0.9997    0.9985    0.9991    456299
         1.0     0.9842    0.9966    0.9904     43701

    accuracy                         0.9983    500000
   macro avg     0.9920    0.9975    0.9947    500000
weighted avg     0.9983    0.9983    0.9983    500000


ROC AUC: 0.9999851530706307
