In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve
from yellowbrick.classifier import discrimination_threshold
from yellowbrick.classifier import confusion_matrix


In [34]:
data = pd.read_csv('../10-Data/credit-card-fraud-data.zip')

In [35]:
data = data.set_index('Time')

In [36]:
data['logAmount'] = np.log1p(data.Amount)
data = data.drop(columns={'Amount'})

In [37]:
Xtrn, Xtst, ytrn, ytst = train_test_split(data.loc[:,data.columns != 'Class'], 
                                          data.Class, 
                                          test_size=0.50, 
                                          random_state=42)

In [41]:
def performance_with_optimal_threshold(model, X = Xtrn, y = ytrn):
    
    from scipy.stats import hmean
    
    prob = model.predict_proba(X)[:,1]
    precision, recall, thresh = precision_recall_curve(y, prob)
    
    F = hmean((precision,recall))
    max_F = max(F)
    threshold = thresh[np.where(F == max_F)][0]
    
    return((threshold, max_F))
    

In [42]:
lerners = {'logistic': LogisticRegression(),
           'rf': RandomForestClassifier()}

results = {}

In [43]:
for i, (n, m) in enumerate(lerners.items()):
    pipe = Pipeline([('scaler', StandardScaler()),
                 ('model', m)])
    pipe.fit(Xtrn, ytrn)
    threshold, F = performance_with_optimal_threshold(m)
    results[n] = {'threshold': threshold,
                  'F': F}
    print(n)
    

logistic
rf


In [44]:
print(results)

{'logistic': {'threshold': 0.25372937576272014, 'F': 0.775347912524851}, 'rf': {'threshold': 0.5, 'F': 0.8369098712446352}}


In [None]:
precision_recall_curve?