In [None]:
import pandas as pd
import numpy as np
import urllib.request
import pickle

import sklearn.model_selection
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import recall_score, roc_auc_score, accuracy_score ,confusion_matrix
from imblearn.over_sampling import SMOTE

from tqdm import tqdm

In [None]:
RANDOM_STATE = 42

In [None]:
# load data
data = pickle.load(urllib.request.urlopen('https://github.com/euanbrown247/bank_fraud_project/blob/main/X_y.pkl?raw=true'))
X = data[0]
y = data[1]

In [None]:
model_gb = GradientBoostingClassifier(
    n_estimators=20)

In [None]:
states = range(0,20)
output = []
for n,seed in enumerate(states):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=seed)

    sm = SMOTE(random_state = seed)
    X_res, y_res = sm.fit_resample(X_train, y_train)

    clf = model_gb

    clf.fit(X_res, y_res)

    #clf.predict(X_test)
    clf.score(X_test, y_test)

    pred = clf.predict(X_test)
    scores = {
        'roundn':n,
        'recall':recall_score(y_test,pred).round(3),  
        'AUC': roc_auc_score(y_test,pred).round(3),  
        'acc': accuracy_score(y_test,pred).round(3),
        'confu':confusion_matrix(y_test,pred)}

    #print(scores)
    output.append(scores)

df = pd.DataFrame(output)

df['model'] = df['modeln'].map({0:'gb',1:'rf',2:'dt','n':'naive'})

print(df.groupby('model')[['recall','AUC']].agg(['mean','var']))