In [19]:
from sklearn.ensemble import RandomForestClassifier,StackingClassifier
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score
import pandas as pd

In [5]:
df = pd.read_csv("data/creditcard.csv")
X = df.drop('Class', axis=1)
y = df["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [6]:
rfc = RandomForestClassifier(n_estimators=100, max_depth=7, random_state=42)
xgb = XGBClassifier(eval_metric="logloss", random_state=42)

In [8]:
rfc.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [9]:
xgb.fit(X_train, y_train)

0,1,2
,objective,'binary:logistic'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [12]:
rfc_pred = rfc.predict_proba(X_test)[:,1]
xgb_pred = xgb.predict_proba(X_test)[:,1]

In [14]:
print(f"ROC-AUC-Score(rfc): {roc_auc_score(y_test, rfc_pred)}")
print(f"ROC-AUC-Score(xgb): {roc_auc_score(y_test, xgb_pred)}")

ROC-AUC-Score(rfc): 0.9616258591928613
ROC-AUC-Score(xgb): 0.9389517272862999


In [15]:
estimators = [
    ("rfc", rfc),
    ("xgb", xgb)
]

In [21]:
stacked_model = StackingClassifier(
    estimators = estimators,
    final_estimator = LogisticRegression(),
    cv = 5
)

In [None]:
stacked_model.fit(X_train, y_train)
sta_pred = stacked_model.predict_proba(X_test)[:,1]

In [None]:
print(f"roc_auc: {roc_auc_score(y_test, sta_pred)}")
print(f"roc_auc: {average_precision_score(y_test, sta_pred)}")