## 1. Data Load

In [27]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/creditcard.csv')
print(len(df[df.Class == 1]))
print(len(df[df.Class == 0]))

492
284315


## 2. Modeling

### 2.0 Library import & data split

In [25]:
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier

from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import *

In [17]:
columns = list(df.columns)
label = columns.pop(-1)
features = columns

X = df[features]
y = df[label]

In [18]:
undersample = RandomUnderSampler(sampling_strategy=0.1)
X_under, y_under = undersample.fit_resample(X, y)
X_train, X_test, y_train, y_test = tts(X_under, y_under, random_state=5, test_size=0.2)

### 2.1 LGBM

In [21]:
lgbm = LGBMClassifier()
lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)

print('lgbm score')
print('AUC Score:', roc_auc_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))
print('Precision Score:', precision_score(y_test, y_pred))
print('Recall Score:', recall_score(y_test, y_pred))

lgbm score
AUC Score: 0.946302949465396
F1 Score: 0.9385474860335196
Precision Score: 0.9882352941176471
Recall Score: 0.8936170212765957


### 2.2 XGBoost

In [23]:
xgb = XGBClassifier()
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

print('xgb score')
print('AUC Score:', roc_auc_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))
print('Precision Score:', precision_score(y_test, y_pred))
print('Recall Score:', recall_score(y_test, y_pred))

xgb score
AUC Score: 0.9404782393563237
F1 Score: 0.9273743016759776
Precision Score: 0.9764705882352941
Recall Score: 0.8829787234042553


### 2.3 Decision Tree

In [26]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)

print('xgb score')
print('AUC Score:', roc_auc_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))
print('Precision Score:', precision_score(y_test, y_pred))
print('Recall Score:', recall_score(y_test, y_pred))

xgb score
AUC Score: 0.9372028483531614
F1 Score: 0.852791878172589
Precision Score: 0.8155339805825242
Recall Score: 0.8936170212765957


## LGBM + XG

In [45]:
from sklearn.ensemble import VotingClassifier

models = [('lgbm', lgbm),
          ('xgb', xgb)]

soft_vote  = VotingClassifier(models, voting='hard')
soft_vote.fit(X_train, y_train)

y_pred = soft_vote.predict(X_test)

print('ensemble model score')
print('AUC Score:', roc_auc_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))
print('Precision Score:', precision_score(y_test, y_pred))
print('Recall Score:', recall_score(y_test, y_pred))

ensemble model score
AUC Score: 0.9409838005292257
F1 Score: 0.9325842696629214
Precision Score: 0.9880952380952381
Recall Score: 0.8829787234042553
