In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
trans = pd.read_csv('./simulated-data/fe_trans.csv')

In [3]:
trans.TX_DATETIME = pd.to_datetime(trans.TX_DATETIME)

In [4]:
trans.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3579099 entries, 0 to 3579098
Data columns (total 32 columns):
 #   Column                                            Dtype         
---  ------                                            -----         
 0   TRANSACTION_ID                                    int64         
 1   TX_DATETIME                                       datetime64[ns]
 2   CUSTOMER_ID                                       int64         
 3   TERMINAL_ID                                       int64         
 4   TX_AMOUNT                                         float64       
 5   TX_TIME_SECONDS                                   int64         
 6   TX_TIME_DAYS                                      int64         
 7   TX_FRAUD                                          int64         
 8   TX_FRAUD_SCENARIO                                 int64         
 9   TX_LAST_DATETIME                                  object        
 10  TX_LAST_SECONDS                           

**Supporting functions and constants**

In [5]:
# This is a baseline model. Use minimal feature set
FEATURE_NUMERICAL = [
    'TX_AMOUNT',
#    'TX_TIME_SECONDS',
#    'TX_TIME_DAYS',
#    'TX_LAST_SECONDS',
    'TX_LAST_HOURS',
    'TX_LAST_DAYS',
    'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW',    
    'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW',
    'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW',
    'CUSTOMER_ID_AVG_AMOUNT_2REC',
    'CUSTOMER_ID_AVG_AMOUNT_10REC',
    'TERMINAL_ID_RISK_2DAY_WINDOW',
    'TERMINAL_ID_RISK_7DAY_WINDOW',
    'SUM_TX_AMOUNT_CUSOMTER_ID_SAME_TERMINAL_SAME_DAY',
    'nb_TX_CUSOMTER_ID_SAME_TERMINAL_SAME_DAY',
#    'CUSTOMER_TERMINAL_DISTANCE',
    'AMOUNT_Z_SCORE',
    'CUSTOMER_TERMINAL_DISTANCE_Z_SCORE'
]

FEATURE_CATEGORICAL = [
     'TX_TIME_HOUR_BIN_0',
     'TX_TIME_HOUR_BIN_1',
     'TX_TIME_HOUR_BIN_2',
     'TX_TIME_HOUR_BIN_3',
     'TX_TIME_HOUR_BIN_4',
     'TX_TIME_HOUR_BIN_5',
     'ONE_DOLLAR'
]

FEATURE_LIST = FEATURE_NUMERICAL + FEATURE_CATEGORICAL

ANOMALY_DETECTOR_FEATURE_EXCLUSION_LIST = [  # these features may be too sensitive to anomaly detector
#    'TX_LAST_HOURS',
#    'CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW',    
#    'CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW',
#    'CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW',
#    'CUSTOMER_ID_AVG_AMOUNT_2REC',
#    'CUSTOMER_ID_AVG_AMOUNT_10REC',
#    'TERMINAL_ID_RISK_2DAY_WINDOW',
#    'TERMINAL_ID_RISK_7DAY_WINDOW'
]

# Target
TARGET = 'TX_FRAUD'

# Extract data from transaction df 
# Returns data greater than the start date and smaller than the end date
def get_dataset_with_date(df,start_date,end_date): 
    time_period = (df['TX_DATETIME']>=start_date) & (df['TX_DATETIME'] <= end_date)
    return df.loc[time_period]

from sklearn.preprocessing import StandardScaler

# split and apply scaling to transaction data frame
def train_test_split_and_scale(trans_df, train_date_range, test_date_range, scaler = None, genuine_only = False):   
    ss = scaler

    # Train data
    train_df = get_dataset_with_date(trans_df, *train_date_range)
    
    if genuine_only == True:
        train_df = train_df[train_df.TX_FRAUD == 0]

    train_df_num = train_df[FEATURE_NUMERICAL]
    train_df_cat = train_df[FEATURE_CATEGORICAL]
    
    if ss is None:
        ss = StandardScaler()
        ss.fit(train_df_num)
        
    train_fit_ss_X = ss.transform(train_df_num)

    train_X_scaled = pd.DataFrame(train_fit_ss_X, columns = train_df_num.columns)
    df_train_new = pd.concat([train_X_scaled, train_df_cat.reset_index(drop = True)],axis=1)
    
    y_train = train_df[TARGET]

    # Test data
    test_df = get_dataset_with_date(trans_df, *test_date_range)
    test_df_num = test_df[FEATURE_NUMERICAL]
    test_df_cat = test_df[FEATURE_CATEGORICAL]

    test_fit_ss_X = ss.transform(test_df_num)  # should used the same, fitted scaler as above

    test_X_scaled = pd.DataFrame(test_fit_ss_X, columns = test_df_num.columns)
    df_test_new = pd.concat([test_X_scaled, test_df_cat.reset_index(drop = True)],axis=1)

    y_test_df = test_df[['TRANSACTION_ID', 'TX_FRAUD_SCENARIO', TARGET]]
    
    return df_train_new, df_test_new, y_train, y_test_df, ss

**Functions for metrics**

In [6]:
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# Getting classes from a vector of fraud probabilities and a threshold
def get_class_from_fraud_probability(fraud_probabilities, threshold=0.5):
    
    predicted_classes = [0 if fraud_probability<threshold else 1 
                         for fraud_probability in fraud_probabilities]

    return predicted_classes


def threshold_based_metrics(fraud_probabilities, true_label, thresholds_list):
    
    results = []
    
    l = list(set(thresholds_list))
    l.sort(reverse = True)
    
    for threshold in l:
    
        predicted_classes = get_class_from_fraud_probability(fraud_probabilities, threshold=threshold)
    
        (TN, FP, FN, TP) = metrics.confusion_matrix(true_label, predicted_classes).ravel()
    
        MME = (FP+FN)/(TN+FP+FN+TP)
        
        accuracy = 1 - MME
    
        TPR = TP/(TP+FN)
        TNR = TN/(TN+FP)
    
        FPR = FP/(TN+FP)
        FNR = FN/(TP+FN)
        
        BER = 1/2*(FPR+FNR)
        
        Gmean = np.sqrt(TPR*TNR)
    
        precision = 0
        FDR = 0
        F1_score=0
        
        if TP+FP>0:
            precision = TP/(TP+FP)
            FDR=FP/(TP+FP)
        
        NPV = 0
        FOR = 0
        
        if TN+FN>0:
            NPV = TN/(TN+FN)
            FOR = FN/(TN+FN)
            
        
        if precision+TPR>0:
            F1_score = 2*(precision*TPR)/(precision+TPR)
    
        results.append([threshold, accuracy, MME, TPR, TNR, FPR, FNR, BER, Gmean, precision, NPV, FDR, FOR, F1_score])
        
    results_df = pd.DataFrame(results,columns=['Threshold', 'Accuracy', 'MME', 'TPR', 'TNR', 'FPR', 'FNR', 'BER', 'G-mean', 'Precision', 'NPV', 'FDR', 'FOR', 'F1 Score'])
    
    return results_df


def print_classification_report(y_test, fraud_probabilities, threshold):
    
    y_predict = [ 1 if p >= threshold else 0 for p in fraud_probabilities ]
    
    print(classification_report(y_test, y_predict))
    tn, fp, fn, tp = confusion_matrix(y_test, y_predict).ravel()
    #print(tn, fp, fn, tp)
    print([tp,fp])
    print([fn,tn])


def calculate_precision_recall(y_test, fraud_probabilities, threshold = 0.5):
    
    y_predict = [ 1 if p >= threshold else 0 for p in fraud_probabilities ]
    
    tn, fp, fn, tp = confusion_matrix(y_test, y_predict).ravel()
    print([tp,fp])
    print([fn,tn])

    precision = tp / (tp + fp) 
    recall = tp / (tp + fn)
    return precision, recall
    

def plot_roc_curve(y_test, y_predict_probas):
    
    def get_template_roc_curve(ax, title,fs,random=True):

        ax.set_title(title, fontsize=fs)
        ax.set_xlim([-0.01, 1.01])
        ax.set_ylim([-0.01, 1.01])

        ax.set_xlabel('False Positive Rate', fontsize=fs)
        ax.set_ylabel('True Positive Rate', fontsize=fs)

        if random:
            ax.plot([0, 1], [0, 1],'r--',label="AUC ROC Random = 0.5")

    FPR_list, TPR_list, threshold = metrics.roc_curve(y_test, y_predict_probas, drop_intermediate=False)

    ROC_AUC = metrics.auc(FPR_list, TPR_list)    

    roc_curve, ax = plt.subplots(figsize=(10,5))
    get_template_roc_curve(ax, "Receiver Operating Characteristic (ROC) Curve",fs=15)
    ax.plot(FPR_list, TPR_list, 'b', color='blue', label = 'AUC ROC Classifier = {0:0.3f}'.format(ROC_AUC))
    ax.legend(loc = 'lower right')
    plt.show()
    
    
def plot_precisionrecall_curve(y_test, y_predict_probas, recalls, precisions):

    def compute_AP(precision, recall):

        AP = 0

        n_thresholds = len(precision)

        for i in range(1, n_thresholds):

            if recall[i]-recall[i-1]>=0:

                AP = AP+(recall[i]-recall[i-1])*precision[i]

        return AP

    def get_template_pr_curve(ax, title,fs, baseline=0.5):
        ax.set_title(title, fontsize=fs)
        ax.set_xlim([-0.01, 1.01])
        ax.set_ylim([-0.01, 1.01])

        ax.set_xlabel('Recall (True Positive Rate)', fontsize=fs)
        ax.set_ylabel('Precision', fontsize=fs)

        ax.plot([0, 1], [baseline, baseline],'r--',label='AP Random = {0:0.3f}'.format(baseline))

    true_labels = prediction_df.TX_FRAUD
    fraud_probabilities = prediction_df.FRAUD_PROBABILITY

    pr_curve, ax = plt.subplots(figsize=(5,5))
    get_template_pr_curve(ax, "Precision Recall (PR) Curve",fs=15,baseline=sum(true_labels)/len(true_labels))
    AP2 = metrics.average_precision_score(true_labels, fraud_probabilities)
    AP = compute_AP(precisions, recalls)
    ax.step(recalls, precisions, 'b', color='blue', label = 'AP Classifier = {0:0.3f}'.format(AP))
    ax.legend(loc = 'lower right')
    plt.show()

**Create model for training**

**(Replace this section with your classifier)**

In [7]:
from xgboost import XGBClassifier

def create_classifier(hyper_params):
    logmodel = XGBClassifier(**hyper_params)
    return logmodel

from sklearn.svm import OneClassSVM

def create_anomaly_detector(hyper_params):
    model = OneClassSVM(**hyper_params)
    return model

**Train model**

In [8]:
CROSS_VALIDATION_INPUTS = [
    {
        'train_date_range' : ('2018-05-01 00:00:00', '2018-06-30 23:59:59'),   # mandatory
        'test_date_range'  : ('2018-07-01 00:00:00', '2018-07-31 23:59:59'),   # mandatory
        'classifier_hyper_params'     : {
                                 'max_depth'      : 2,
                                 'n_estimators'   : 100,
                                 'learning_rate'  : 0.1,
                                 'min_child_weight'   : 5,
                                 'max_delta_step'       : 9,
                                 'n_jobs'         : -1,
                                 'random_state'   : 789
                             },
        'anomaly_detector_hyper_params'     : {
                                 'kernel'         : 'rbf',
                                 'nu'             : 0.02,
#                                 'gamma'          : 0.007
                             }
    }
]



def arb(ad_predict, cl_predict_proba, cl_threshold, arbitrate_threshold, default_model):
    cl_predict = 1 if cl_predict_proba >= cl_threshold else 0

    if ad_predict == cl_predict:
        return (ad_predict, 'unanimous')

    # arbitrate

    cl_predict2 = None

    if cl_predict == 0:
        t = cl_threshold - (cl_threshold * arbitrate_threshold)
        if cl_predict_proba >= t:
            cl_predict2 = 1
        else:
            cl_predict2 = 0
    elif cl_predict == 1:
        t = (1 - cl_threshold) * arbitrate_threshold + cl_threshold
        if cl_predict_proba >= t:
            cl_predict2 = 1
        else:
            cl_predict2 = 0

    if ad_predict == cl_predict2:
        return (ad_predict, 'arbitrated')
    elif default_model == 'cl':
        return (cl_predict, 'unsettled')
    elif default_model == 'ad':
        return (ad_predict, 'unsettled')


def arbitrate(predict_df, cl_threshold, arbitrate_threshold, default_model = 'cl'):

    predict_probas = []
    reasons = []
    #predicts = []
    
    for i, r in predict_df.iterrows():
        a = arb(r.AD_FRAUD_PROBABILITY, r.CL_FRAUD_PROBABILITY, cl_threshold, arbitrate_threshold, default_model)
        predict_probas.append(int(a[0]))
        reasons.append(a[1])
        #predicts.append(a[2])

    pred_df = predict_df.copy()
    pred_df['DUAL_FRAUD_PROBABILITY'] = predict_probas
    pred_df['DUAL_ARBITRATION'] = reasons
    
    return pred_df

    
def train_and_test_classifier(trans_df, cv_inputs):

    # Classifier training
    X_train, X_test, y_train, y_test_df, scaler = \
        train_test_split_and_scale(trans, cv_inputs['train_date_range'], cv_inputs['test_date_range'], scaler=None, genuine_only=False)
    y_test = y_test_df[TARGET]
    
    cl_hyper_params = cv_inputs['classifier_hyper_params']
    if cl_hyper_params is not None:
        classifier = create_classifier(cl_hyper_params)
    else:
        classifier = create_classifier()

    classifier.fit(X_train, y_train)

    prediction_probas = classifier.predict_proba(X_test)[:, 1]
    y_test_df['CL_FRAUD_PROBABILITY'] = prediction_probas

    return X_train, X_test, y_train, y_test, classifier, y_test_df, scaler


def train_and_test_anomaly_detector(trans_df, cv_inputs):

    # Anomaly detector training
    X_train, X_test, y_train, y_test_df, scaler = \
        train_test_split_and_scale(trans, cv_inputs['train_date_range'], cv_inputs['test_date_range'], scaler=None, genuine_only=True)
    y_test = y_test_df[TARGET]
    
    # remove sensitive features
    X_train = X_train.drop(columns = ANOMALY_DETECTOR_FEATURE_EXCLUSION_LIST)
    X_test = X_test.drop(columns = ANOMALY_DETECTOR_FEATURE_EXCLUSION_LIST)
    
    ad_hyper_params = cv_inputs['anomaly_detector_hyper_params']
    if ad_hyper_params is not None:
        anomaly_detector = create_anomaly_detector(ad_hyper_params)
    else:
        anomaly_detector = create_anomaly_detector()

    anomaly_detector.fit(X_train)

    prediction_probas = anomaly_detector.predict(X_test)
    y_test_df['AD_FRAUD_PROBABILITY'] = [ 1 if p == -1 else 0 for p in prediction_probas ]
    
    return X_train, X_test, y_train, y_test, anomaly_detector, y_test_df, scaler


## Execution
predicts_metrics = []

for i, cv_inputs in enumerate(CROSS_VALIDATION_INPUTS):
    print('\n==============================================')
    print('Dual model training round', i+1, 'of', len(CROSS_VALIDATION_INPUTS))
    print('==============================================\n')
    
    print('Train and evaluate fraud detector...')
    X_train, X_test, y_train, y_test, classifier, cl_pred_df, cl_scaler = train_and_test_classifier(trans, cv_inputs)
    print('---- Classifier columns - train ----')
    print(X_train.dtypes)
    print('--- Classifier columns - test ----')
    print(X_test.dtypes)
    print('-----------------------------------------')
    
    print('Train and evaluate anomaly detector...')
    X_train, X_test, y_train, y_test, anomaly_detector, ad_pred_df, ad_scaler = train_and_test_anomaly_detector(trans, cv_inputs)
    print('---- Anomaly Detector columns - train ----')
    print(X_train.dtypes)
    print('---- Anomaly Detector columns - test ----')
    print(X_test.dtypes)
    print('-----------------------------------------')
    
    prediction_df = cl_pred_df.copy()
    prediction_df['AD_FRAUD_PROBABILITY'] = ad_pred_df['AD_FRAUD_PROBABILITY']
    
    # Display thresholds against all metrics, should choose a threshold base on metrics that need to be focus on
    print('Thresholds and corresponding metrics. Should choose a threshold based on values of metrics.')
    metrics_df = threshold_based_metrics(prediction_df.CL_FRAUD_PROBABILITY, prediction_df.TX_FRAUD, np.round(prediction_df.CL_FRAUD_PROBABILITY, decimals = 1))
    display(metrics_df)
        
    
    classifier_threshold = 0.7
    arbitrate_threshold = 0.5
    unsettled_default = 'cl'  # classifier
    
    print('FOR REFERENCE ONLY:')
    print('Classifier threshold', classifier_threshold, '\tArbitrate threshold', arbitrate_threshold, '\tUnsettled default', unsettled_default)
    
    prediction_df = arbitrate(prediction_df, classifier_threshold, arbitrate_threshold, unsettled_default)
    
    print('Classifier, threshold:', classifier_threshold)
    pre, re = calculate_precision_recall(y_test, prediction_df.CL_FRAUD_PROBABILITY, classifier_threshold)
    print('Precision', pre, '\t\tRecall', re)
    
    print('Anomaly Detector')
    pre, re = calculate_precision_recall(y_test, prediction_df.AD_FRAUD_PROBABILITY)
    print('Precision', pre, '\t\tRecall', re)

    print('Dual Model Detector')
    pre, re = calculate_precision_recall(y_test, prediction_df.DUAL_FRAUD_PROBABILITY)
    print('Precision', pre, '\t\tRecall', re)

    predicts_metrics.append((prediction_df, metrics_df, classifier, anomaly_detector, cl_scaler, ad_scaler))


Dual model training round 1 of 1

Train and evaluate fraud detector...




---- Classifier columns - train ----
TX_AMOUNT                                           float64
TX_LAST_HOURS                                       float64
TX_LAST_DAYS                                        float64
CUSTOMER_ID_AVG_AMOUNT_1DAY_WINDOW                  float64
CUSTOMER_ID_AVG_AMOUNT_7DAY_WINDOW                  float64
CUSTOMER_ID_AVG_AMOUNT_30DAY_WINDOW                 float64
CUSTOMER_ID_AVG_AMOUNT_2REC                         float64
CUSTOMER_ID_AVG_AMOUNT_10REC                        float64
TERMINAL_ID_RISK_2DAY_WINDOW                        float64
TERMINAL_ID_RISK_7DAY_WINDOW                        float64
SUM_TX_AMOUNT_CUSOMTER_ID_SAME_TERMINAL_SAME_DAY    float64
nb_TX_CUSOMTER_ID_SAME_TERMINAL_SAME_DAY            float64
AMOUNT_Z_SCORE                                      float64
CUSTOMER_TERMINAL_DISTANCE_Z_SCORE                  float64
TX_TIME_HOUR_BIN_0                                    int64
TX_TIME_HOUR_BIN_1                                    int64
TX_

Unnamed: 0,Threshold,Accuracy,MME,TPR,TNR,FPR,FNR,BER,G-mean,Precision,NPV,FDR,FOR,F1 Score
0,1.0,0.983236,0.016764,0.0,1.0,0.0,1.0,0.5,0.0,0.0,0.983236,0.0,0.016764,0.0
1,0.9,0.994478,0.005522,0.683157,0.999786,0.000214,0.316843,0.158529,0.826444,0.981941,0.994626,0.018059,0.005374,0.805742
2,0.8,0.995146,0.004854,0.727326,0.999712,0.000288,0.272674,0.136481,0.852712,0.977315,0.995371,0.022685,0.004629,0.83399
3,0.7,0.996173,0.003827,0.800353,0.999511,0.000489,0.199647,0.100068,0.894406,0.965427,0.996606,0.034573,0.003394,0.875174
4,0.6,0.996301,0.003699,0.810758,0.999464,0.000536,0.189242,0.094889,0.90018,0.962704,0.996782,0.037296,0.003218,0.880222
5,0.5,0.996485,0.003515,0.825285,0.999404,0.000596,0.174715,0.087656,0.908181,0.959379,0.997028,0.040621,0.002972,0.887294
6,0.4,0.996541,0.003459,0.846486,0.9991,0.0009,0.153514,0.077207,0.919633,0.941279,0.997387,0.058721,0.002613,0.89137
7,0.3,0.996512,0.003488,0.864154,0.998768,0.001232,0.135846,0.068539,0.929026,0.922851,0.997686,0.077149,0.002314,0.892539
8,0.2,0.995985,0.004015,0.873773,0.998069,0.001931,0.126227,0.064079,0.933855,0.885243,0.997848,0.114757,0.002152,0.87947
9,0.1,0.995445,0.004555,0.888496,0.997269,0.002731,0.111504,0.057117,0.941313,0.847248,0.998097,0.152752,0.001903,0.867382


FOR REFERENCE ONLY:
Classifier threshold 0.7 	Arbitrate threshold 0.5 	Unsettled default cl
Classifier, threshold: 0.7
[4077, 146]
[1017, 298634]
Precision 0.9654274212645039 		Recall 0.8003533568904594
Anomaly Detector
[4538, 6018]
[556, 292762]
Precision 0.4298976885183782 		Recall 0.8908519827247743
Dual Model Detector
[4312, 330]
[782, 298450]
Precision 0.9289099526066351 		Recall 0.8464860620337652


**Save results to harddisk**

In [9]:
# Save Round 1 results
prediction_df, metrics_df, classifier, anomaly_detector, cl_scaler, ad_scaler = \
    predicts_metrics[0]

import os

if not os.path.exists('./dual_model_results'):
    os.makedirs('./dual_model_results')

prediction_df.to_csv('./dual_model_results/prediction.csv', index=False)
metrics_df.to_csv('./dual_model_results/metrics.csv', index=False)

import joblib
joblib.dump(cl_scaler, './dual_model_results/classifier_scaler.bin', compress=True)
joblib.dump(ad_scaler, './dual_model_results/anomaly_detector_scaler.bin', compress=True)

import pickle
pickle.dump(classifier, open('./dual_model_results/classifier_model.pkl', 'wb'))
pickle.dump(anomaly_detector, open('./dual_model_results/anomaly_detector_model.pkl', 'wb'))


**Load results from harddisk**

In [10]:
# Load Round 1 results
prediction_df = pd.read_csv('./dual_model_results/prediction.csv')
metrics_df = pd.read_csv('./dual_model_results/metrics.csv')

import joblib
cl_scaler = joblib.load('./dual_model_results/classifier_scaler.bin')
ad_scaler = joblib.load('./dual_model_results/anomaly_detector_scaler.bin')

import pickle
classifier = pickle.load(open('./dual_model_results/classifier_model.pkl', 'rb'))
anomaly_detector = pickle.load(open('./dual_model_results/anomaly_detector_model.pkl', 'rb'))


**Analyze results**

In [11]:

def analyze_dual_model_result(input_prediction_df):

    for classifier_threshold in [0.3, 0.4, 0.5, 0.6, 0.7]:
        for arbitrate_threshold in [0.4, 0.5, 0.6]:
            unsettled_default = 'cl'            
            print('Classifier threshold', classifier_threshold, '\tArbitrate threshold', arbitrate_threshold, '\tUnsettled default', unsettled_default)

            prediction_df = arbitrate(input_prediction_df, classifier_threshold, arbitrate_threshold, unsettled_default)

            # --------------------------------------------------
            # This part is same regardless of unsettled_default
            print('* Classifier, threshold:', classifier_threshold)
            pre, re = calculate_precision_recall(y_test, prediction_df.CL_FRAUD_PROBABILITY, classifier_threshold)
            print('Precision', round(pre,2), '\t\tRecall', round(re,2))

            print('* Anomaly Detector')
            pre, re = calculate_precision_recall(y_test, prediction_df.AD_FRAUD_PROBABILITY)
            print('Precision', round(pre,2), '\t\tRecall', round(re,2))
            # --------------------------------------------------
            
            print('** Dual Model Detector - Using Classifier for unsettled cases')
            pre, re = calculate_precision_recall(y_test, prediction_df.DUAL_FRAUD_PROBABILITY)
            print('Precision', round(pre,2), '\t\tRecall', round(re,2))
            
            # --------------------------------------------------
            # This part is same regardless of unsettled_default
            reasons = prediction_df.DUAL_ARBITRATION.value_counts()

            test_case_count = reasons.sum()
            unanimous_pct = round(100 * reasons.unanimous / test_case_count, 2)
            arbitrated_pct = round(100 * reasons.arbitrated / test_case_count, 2)
            unsettled_pct = round(100 * reasons.unsettled / test_case_count, 2)

            print( pd.DataFrame(prediction_df.DUAL_ARBITRATION.value_counts() ).transpose()[['unanimous','arbitrated','unsettled']] )            
            print('Percentage           ', unanimous_pct, ' '*6, arbitrated_pct, ' '*5, unsettled_pct)
            print()
            # --------------------------------------------------


In [12]:
# Round 1 results from above
# prediction_df, metrics_df, classifier, anomaly_detector, cl_scaler, ad_scaler

analyze_dual_model_result(prediction_df)

Classifier threshold 0.3 	Arbitrate threshold 0.4 	Unsettled default cl
* Classifier, threshold: 0.3
[4402, 368]
[692, 298412]
Precision 0.92 		Recall 0.86
* Anomaly Detector
[4538, 6018]
[556, 292762]
Precision 0.43 		Recall 0.89
** Dual Model Detector - Using Classifier for unsettled cases
[4444, 571]
[650, 298209]
Precision 0.89 		Recall 0.87
                  unanimous  arbitrated  unsettled
DUAL_ARBITRATION     297874         317       5683
Percentage            98.03        0.1       1.87

Classifier threshold 0.3 	Arbitrate threshold 0.5 	Unsettled default cl
* Classifier, threshold: 0.3
[4402, 368]
[692, 298412]
Precision 0.92 		Recall 0.86
* Anomaly Detector
[4538, 6018]
[556, 292762]
Precision 0.43 		Recall 0.89
** Dual Model Detector - Using Classifier for unsettled cases
[4463, 644]
[631, 298136]
Precision 0.87 		Recall 0.88
                  unanimous  arbitrated  unsettled
DUAL_ARBITRATION     297874         427       5573
Percentage            98.03        0.14       1.8