In [51]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler

from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression


from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score,accuracy_score




In [2]:

df = pd.read_csv("fraud_reduced.csv", index_col=0)

X = df.drop("Class", axis=1)
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,test_size=0.33, stratify=y)

In [33]:
import joblib

svm_clf = joblib.load("Models\SVMModel.pkl")
xgb_clf = joblib.load("Models\XGBModel.pkl")
rnd_clf = joblib.load("Models\RandomForestModel.pkl")
rus_clf = joblib.load("Models\RUSboost.pkl")
ada_clf = joblib.load("Models\AdaBoost.pkl")

svm_scaled = Pipeline([
    ('scaler', RobustScaler()),
    ('svm', svm_clf)
])

In [34]:
estimator_list =[
    ('xgb', xgb_clf),
    ('rnd', rnd_clf),
    ('svm', svm_scaled),
    ('rus', rus_clf),
    ('ada', ada_clf)
]

In [35]:
stack_model = StackingClassifier(
    estimators=estimator_list, final_estimator=LogisticRegression()
)

In [36]:
stack_model.fit(X_train, y_train)

In [39]:
vote_model_hard = VotingClassifier(
    estimators=estimator_list, voting ='hard'
)

In [40]:
vote_model_hard.fit(X_train, y_train)

In [41]:
vote_model_soft = VotingClassifier(
    estimators=estimator_list, voting ='soft'
)

In [42]:
vote_model_soft.fit(X_train, y_train)

In [43]:
y_pred_vh = vote_model_hard.predict(X_test)

In [52]:

cm = confusion_matrix(y_test, y_pred_vh)
print(cm)

VC_hard_Recall = recall_score(y_test, y_pred_vh)
VC_hard_Precision = precision_score(y_test, y_pred_vh)
VC_hard_f1 = f1_score(y_test, y_pred_vh)
VC_hard_accuracy = accuracy_score(y_test, y_pred_vh)


[[93814    11]
 [   42   120]]


In [47]:
y_pred_vs = vote_model_soft.predict(X_test)

In [53]:

cm = confusion_matrix(y_test, y_pred_vs)
print(cm)
VC_soft_Recall = recall_score(y_test, y_pred_vs)
VC_soft_Precision = precision_score(y_test, y_pred_vs)
VC_soft_f1 = f1_score(y_test, y_pred_vs)
VC_soft_accuracy = accuracy_score(y_test, y_pred_vs)



[[93817     8]
 [   40   122]]


In [49]:
y_pred_st = stack_model.predict(X_test)

In [54]:

cm = confusion_matrix(y_test, y_pred_st)
print(cm)

ST_Recall = recall_score(y_test, y_pred_st)
ST_Precision = precision_score(y_test, y_pred_st)
ST_f1 = f1_score(y_test, y_pred_st)
ST_accuracy = accuracy_score(y_test, y_pred_st)


[[93816     9]
 [   41   121]]


In [55]:
stats_hard = [(VC_hard_Recall, VC_hard_Precision, VC_hard_f1, VC_hard_accuracy)]
stats_soft = [(VC_soft_Recall, VC_soft_Precision, VC_soft_f1, VC_soft_accuracy)]
stats_stack = [(ST_Recall, ST_Precision, ST_f1, ST_accuracy)]


In [56]:
VC_soft_score = pd.DataFrame(data = stats_soft, columns=
                        ['Recall','Precision','F1 Score', 'Accuracy'])

In [57]:
VC_hard_score = pd.DataFrame(data = stats_hard, columns=
                        ['Recall','Precision','F1 Score', 'Accuracy'])

In [58]:
stack_score = pd.DataFrame(data = stats_stack, columns=
                        ['Recall','Precision','F1 Score', 'Accuracy'])

In [59]:
predictions = pd.concat([VC_hard_score, VC_soft_score,stack_score], ignore_index=True, sort=False)

In [60]:
predictions

Unnamed: 0,Recall,Precision,F1 Score,Accuracy
0,0.740741,0.916031,0.819113,0.999436
1,0.753086,0.938462,0.835616,0.999489
2,0.746914,0.930769,0.828767,0.999468
