In [1]:
!pip install xgboost
!pip install shap



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay
from imblearn.over_sampling import SMOTE
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [3]:
df =pd.read_csv("Fraud.csv")

In [4]:
df.drop(['nameOrig','nameDest'],axis=1,inplace=True)


In [5]:
df['type']=LabelEncoder().fit_transform(df['type'])

In [6]:
df=df[~((df['oldbalanceOrg']==0) & (df['newbalanceOrig']==0) & (df['oldbalanceDest']==0) & (df['newbalanceDest']==0))]

In [7]:
x=df.drop(["isFraud","isFlaggedFraud"],axis=1)
y=df["isFraud"]

In [8]:
X_scaled=pd.DataFrame(StandardScaler().fit_transform(x),columns=x.columns)

In [9]:
smote=SMOTE(random_state=42)
X_res,Y_res=smote.fit_resample(X_scaled,y)


In [10]:
X_train,X_test,Y_train,Y_test=train_test_split(X_res,Y_res,test_size=0.2,stratify=Y_res,random_state=42)

In [None]:
xgb=XGBClassifier(random_state=42,eval_metric='auc',use_label_encoder=False)
param_grid={'n_estimators':[100,200,300],
            'learning_rate':[0.01,0.05,0.1],
            'max_depth':[4,5,6,7],
            'subsample':[0.6,0.8,1],
            'colsample_bytree':[0.6,0.8,1]}
search=RandomizedSearchCV(estimator=xgb,param_distributions=param_grid,n_iter=10,scoring='roc_auc',cv=3,verbose=1,random_state=42,n_jobs=-1)
search.fit(X_train,Y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits


In [None]:
best_xgb=search.best_estimator_
y_pred=best_xgb.predict(X_test)

In [None]:
y_prob=best_xgb.predict_proba(X_test)[:,1]

In [None]:
print("Confusion Matrix:\n",confusion_matrix(Y_test,y_pred))
print("\n classification Report:\n",classification_report(Y_test,y_pred))
print("\nRocAUC Score:",roc_auc_score(Y_test,y_prob))
      

In [None]:
RocCurveDisplay.from_estimator(best_xgb,X_test,Y_test)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
sns.barplot(x=best_xgb.feature_importances_,y=x.columns)
plt.title("feature Importances from Xgboost")
plt.show()