In [82]:
import pandas as pd
from imblearn.over_sampling import ADASYN
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, roc_auc_score, auc
df=pd.read_csv('creditcard.csv')

In [83]:

#Data Settings
X = df.drop(['Class'], axis=1)
y = df['Class']
rs= 2023
#Standardize
mm = MinMaxScaler()
X['nTime'] = mm.fit_transform(X['Time'].values.reshape(-1, 1))
X = X.drop(['Time'], axis=1)
cols = X.columns.to_list()
cols =[cols[-1]]+cols[:-1]
X = X[cols]
X['nAmount'] = mm.fit_transform(X['Amount'].values.reshape(-1,1))
X = X.drop(['Amount'], axis=1)

X_train, X_left, y_train, y_left = train_test_split(X, y, train_size=0.8, stratify=y, shuffle=True, random_state=rs)
X_valid, X_test, y_valid, y_test = train_test_split(X_left,y_left, test_size=0.5, stratify=y_left, shuffle=True, random_state=rs)


In [84]:
print(len(X_train))

227845


In [85]:

#Default Decision Tree With Raw data
dTree = DecisionTreeClassifier(random_state=rs)

dTree.fit(X_train, y_train)

pred = dTree.predict(X_valid)
print(classification_report(y_valid, pred))

fpr, tpr, threshold = roc_curve(y_valid, pred,pos_label=1)
auc1 = auc(fpr, tpr)

print("AUC:",auc1)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.77      0.76      0.76        49

    accuracy                           1.00     28481
   macro avg       0.89      0.88      0.88     28481
weighted avg       1.00      1.00      1.00     28481

AUC: 0.8773575764014102


In [86]:
weighted_dTree = DecisionTreeClassifier(class_weight='balanced', random_state=rs)

weighted_dTree.fit(X_train, y_train)
score = weighted_dTree.score(X_valid, y_valid)
score
pred_w = weighted_dTree.predict(X_valid)
print(classification_report(y_valid, pred_w))

fpr, tpr, threshold = roc_curve(y_valid, pred_w,pos_label=1)
auc1 = auc(fpr, tpr)

print("AUC:", auc1)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.82      0.76      0.79        49

    accuracy                           1.00     28481
   macro avg       0.91      0.88      0.89     28481
weighted avg       1.00      1.00      1.00     28481

AUC: 0.8774103338577974


In [87]:
ada = ADASYN(random_state= rs)
df_res, c_res = ada.fit_resample(X_train, y_train)
dTree = DecisionTreeClassifier(random_state=rs)


dTree.fit(df_res, c_res)

pred = dTree.predict(X_valid)
print(classification_report(y_valid, pred))

fpr, tpr, threshold = roc_curve(y_valid, pred,pos_label=1)
auc1 = auc(fpr, tpr)
print("AUC", auc1)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.41      0.78      0.54        49

    accuracy                           1.00     28481
   macro avg       0.71      0.89      0.77     28481
weighted avg       1.00      1.00      1.00     28481

AUC 0.8868054678258472


In [88]:
print(len(df_res))

454885


In [89]:
weighted_dTree = DecisionTreeClassifier(class_weight='balanced', random_state=rs)

weighted_dTree.fit(df_res, c_res)
score = weighted_dTree.score(X_valid, y_valid)
score
pred_w = weighted_dTree.predict(X_valid)
print(classification_report(y_valid, pred_w))

fpr, tpr, threshold = roc_curve(y_valid, pred,pos_label=1)
auc1 = auc(fpr, tpr)
print("AUC", auc1)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.44      0.78      0.56        49

    accuracy                           1.00     28481
   macro avg       0.72      0.89      0.78     28481
weighted avg       1.00      1.00      1.00     28481

AUC 0.8868054678258472
