In [3]:
import os
import sys
sys.path.append('../../')

In [163]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
import math

In [164]:
from src.utils import get_db_con
from src.analysis_functions import (
    get_all_decisions, groups, schemas, users, dps, dt, pdr, group_order, colors, 
    assign_conf_mat_cell, _modify_value_and_time
)

In [5]:
cred_file = '../../conf/credentials.yaml'
conn = get_db_con(cred_file)

#### Parameter initialization

In [8]:
params={
    'fn': -3, 
    'p_loss_trx': 0.5, # prob of losing this transaction
    'cust_worth': 3, # The long term customer worth as a coefficiant of the trx val
    'p_loss_cust': 0.1, # probablity of losing the customer due to the rejection
    'p_return_cust': 1, # probability of the customer creating future worth as we expect
    'suspicious_add_time': 600
}

#### Fetching all decisions

In [9]:
%%capture
all_decisions = get_all_decisions(conn, schemas, users, groups)
all_decisions['conf_cell'] = assign_conf_mat_cell(all_decisions, suspicious_strategy='correct') 

all_decisions = _modify_value_and_time(all_decisions, params, 'correct')

msk = all_decisions['trx_amnt'] > 0 

all_decisions = all_decisions[msk]

all_decisions['potential_revenue'] = all_decisions.apply(
    lambda x: x['trx_amnt'] * (1 + (params['cust_worth'] * params['p_return_cust'])) if x['label']==0 else 0, 
    axis=1
)

In [160]:
all_decisions['accuracy'] = all_decisions.apply(
    lambda x: 1 if x['conf_cell'] in {'tp', 'tn'} else 0,
    axis=1
)

all_decisions['fn'] = all_decisions.apply(
    lambda x: 1 if x['conf_cell'] in {'fn'} else 0,
    axis=1
)

all_decisions['fp'] = all_decisions.apply(
    lambda x: 1 if x['conf_cell'] in {'fp'} else 0,
    axis=1
)

all_decisions['approved'] = all_decisions.apply(
    lambda x: 1 if x['decision'] in {'approved'} else 0,
    axis=1
)

all_decisions['declined'] = all_decisions.apply(
    lambda x: 1 if x['decision'] in {'declined'} else 0,
    axis=1
)

all_decisions['suspicious'] = all_decisions.apply(
    lambda x: 1 if x['decision'] in {'suspicious'} else 0,
    axis=1
)

In [191]:
all_decisions['group_exp_combined'] = all_decisions.apply(
    lambda x: 'Explainer' if x['group'] in {'TreeInt', 'LIME', 'TreeSHAP'} else x['group'],
    axis=1
)

In [192]:
all_decisions

Unnamed: 0,xplz_id,group,user_name,trx_amnt,decision,decision_time,label,conf_cell,conf_mat,decision_value,modified_time,potential_revenue,accuracy,fn,fp,approved,declined,suspicious,explainer,group_exp_combined
0,0005e73512cc6b51164de77a492947cb,ML Model,j,206.86,approved,24,0.0,tn,tn,827.44,24,827.44,1,0,0,1,0,0,0,ML Model
1,00199cf0e179eee21bbc464dfdd26695,Irrelevant,n,102.69,approved,126,1.0,fn,fn,-308.07,126,0.00,0,1,0,1,0,0,0,Irrelevant
2,0023ade89e3b6dedae14bced27d2e02a,Data,n,88.78,approved,61,0.0,tn,tn,355.12,61,355.12,1,0,0,1,0,0,0,Data
3,0053a4cc33ce725779c115d4c8fe392b,ML Model,j,137.94,approved,8,0.0,tn,tn,551.76,8,551.76,1,0,0,1,0,0,0,ML Model
4,005ba4ef23b2226267fe9ab620fe1d10,Irrelevant,t,190.00,approved,39,0.0,tn,tn,760.00,39,760.00,1,0,0,1,0,0,0,Irrelevant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3475,ff77551516ec6697d57a3cd27b0f3a83,TreeInt,j,128.00,approved,4,0.0,tn,tn,512.00,4,512.00,1,0,0,1,0,0,1,Explainer
3476,ff870623958495f4db39fdc9410e379f,Random,t,800.00,approved,19,0.0,tn,tn,3200.00,19,3200.00,1,0,0,1,0,0,0,Random
3477,ff90b767a3da182e3399ee8ad856fa12,TreeInt,n,353.31,approved,60,0.0,tn,tn,1413.24,60,1413.24,1,0,0,1,0,0,1,Explainer
3479,fff66c84678648b09168e4a635f276f3,TreeInt,t,319.31,declined,45,1.0,tp,tp,0.00,45,0.00,1,0,0,0,1,0,1,Explainer


## Linear Regression for Time


- We include only the rows pertaining to a specific hypotheses (e.g., ML vs Data has only rows from ML model and Data variants)
- We control for the user, trasaction dollar value, and use the variant as the treatment
- Output --> Modified decision time that takes into account the "suspicious time penalty"
- Coefficients and p-values corroborated with our findings
    - ML model is faster than Data (coef=-27.8459, p=.001)
    - SHAP slower than ML (coeff= 15.8477, p=0.02)
    - TreeInt slower than ML (coeff= 14.2354, p=0.03)
    - Combined explainer slower than ML (coeff=12.0946, p=.03)

In [214]:
def fit_linear_model_for_pairwise_test(all_decisions, variants_to_compare, output_column, variant_column='group'):
    relevant_features = ['user_name', 'trx_amnt', variant_column , output_column]
    
    group1 = variants_to_compare[0]
    group2 = variants_to_compare[1]
    
    row_filter = (all_decisions[variant_column] == group1) | (all_decisions[variant_column] == group2)
    
    df = all_decisions[row_filter][relevant_features]
    dummified = pd.get_dummies(df)
    
    y = dummified[output_column]
    X = dummified.drop(columns=output_column)
    
    # Dropping the control group as it is redundant
    X = X.drop(variant_column+'_'+group2, axis=1)
    
    # Dropping users to see what happenes
    # X = X.drop('user_name_n', axis=1)
    # X = X.drop('user_name_j', axis=1)
    # X = X.drop('user_name_t', axis=1)

    scaler = MinMaxScaler().fit(X)
    x_scaled = scaler.transform(X)
    
    x_scaled = pd.DataFrame(x_scaled, columns=X.columns, index=X.index)
    
    x_scaled = sm.add_constant(x_scaled)
        
    mod = sm.OLS(y, x_scaled)
    results = mod.fit()
    
    return results    

#### ML vs Data

In [209]:
res = fit_linear_model_for_pairwise_test(all_decisions, ('ML Model', 'Data'), 'modified_time')
res.summary()

0,1,2,3
Dep. Variable:,modified_time,R-squared:,0.04
Model:,OLS,Adj. R-squared:,0.036
Method:,Least Squares,F-statistic:,10.18
Date:,"Mon, 06 Feb 2023",Prob (F-statistic):,4.46e-08
Time:,18:09:46,Log-Likelihood:,-6209.7
No. Observations:,991,AIC:,12430.0
Df Residuals:,986,BIC:,12450.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,58.3959,4.622,12.635,0.000,49.327,67.465
trx_amnt,56.8273,77.832,0.730,0.465,-95.907,209.562
user_name_j,-10.9484,5.924,-1.848,0.065,-22.574,0.677
user_name_n,33.0025,5.964,5.533,0.000,21.298,44.707
user_name_t,36.3418,5.938,6.120,0.000,24.689,47.995
group_ML Model,-27.8459,8.118,-3.430,0.001,-43.776,-11.916

0,1,2,3
Omnibus:,864.46,Durbin-Watson:,2.069
Prob(Omnibus):,0.0,Jarque-Bera (JB):,15099.737
Skew:,4.217,Prob(JB):,0.0
Kurtosis:,20.163,Cond. No.,2860000000000000.0


#### SHAP vs ML

In [210]:
res = fit_linear_model_for_pairwise_test(all_decisions, ('TreeSHAP', 'ML Model'), 'modified_time')
res.summary()

0,1,2,3
Dep. Variable:,modified_time,R-squared:,0.036
Model:,OLS,Adj. R-squared:,0.031
Method:,Least Squares,F-statistic:,7.279
Date:,"Mon, 06 Feb 2023",Prob (F-statistic):,1.04e-06
Time:,18:09:52,Log-Likelihood:,-6047.8
No. Observations:,994,AIC:,12110.0
Df Residuals:,988,BIC:,12140.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-5.638e+13,9.66e+14,-0.058,0.953,-1.95e+15,1.84e+15
trx_amnt,117.6316,69.278,1.698,0.090,-18.318,253.581
user_name_j,5.638e+13,9.66e+14,0.058,0.953,-1.84e+15,1.95e+15
user_name_n,5.638e+13,9.66e+14,0.058,0.953,-1.84e+15,1.95e+15
user_name_t,5.638e+13,9.66e+14,0.058,0.953,-1.84e+15,1.95e+15
group_TreeSHAP,15.8477,6.779,2.338,0.020,2.545,29.151

0,1,2,3
Omnibus:,980.257,Durbin-Watson:,1.959
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28676.613
Skew:,4.847,Prob(JB):,0.0
Kurtosis:,27.463,Cond. No.,730000000000000.0


#### LIME vs ML

In [211]:
res = fit_linear_model_for_pairwise_test(all_decisions, ('LIME', 'ML Model'), 'modified_time')
res.summary()

0,1,2,3
Dep. Variable:,modified_time,R-squared:,0.051
Model:,OLS,Adj. R-squared:,0.047
Method:,Least Squares,F-statistic:,13.38
Date:,"Mon, 06 Feb 2023",Prob (F-statistic):,1.28e-10
Time:,18:09:52,Log-Likelihood:,-5991.0
No. Observations:,995,AIC:,11990.0
Df Residuals:,990,BIC:,12020.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,36.4639,3.718,9.808,0.000,29.169,43.759
trx_amnt,81.9302,61.607,1.330,0.184,-38.966,202.826
user_name_j,-18.5341,4.631,-4.002,0.000,-27.622,-9.446
user_name_n,20.7961,4.649,4.473,0.000,11.672,29.920
user_name_t,34.2019,4.675,7.315,0.000,25.027,43.377
group_LIME,6.2565,6.337,0.987,0.324,-6.180,18.693

0,1,2,3
Omnibus:,1043.944,Durbin-Watson:,2.056
Prob(Omnibus):,0.0,Jarque-Bera (JB):,39272.629
Skew:,5.247,Prob(JB):,0.0
Kurtosis:,31.934,Cond. No.,1400000000000000.0


#### TreeInt vs ML

In [216]:
res = fit_linear_model_for_pairwise_test(all_decisions, ('TreeInt', 'ML Model'), 'modified_time')
res.summary()

0,1,2,3
Dep. Variable:,modified_time,R-squared:,0.054
Model:,OLS,Adj. R-squared:,0.051
Method:,Least Squares,F-statistic:,14.21
Date:,"Mon, 06 Feb 2023",Prob (F-statistic):,2.8e-11
Time:,18:17:50,Log-Likelihood:,-6024.6
No. Observations:,993,AIC:,12060.0
Df Residuals:,988,BIC:,12080.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,4.99e+14,1.21e+15,0.414,0.679,-1.87e+15,2.87e+15
trx_amnt,91.3023,68.178,1.339,0.181,-42.488,225.093
user_name_j,-4.99e+14,1.21e+15,-0.414,0.679,-2.87e+15,1.87e+15
user_name_n,-4.99e+14,1.21e+15,-0.414,0.679,-2.87e+15,1.87e+15
user_name_t,-4.99e+14,1.21e+15,-0.414,0.679,-2.87e+15,1.87e+15
group_TreeInt,14.2354,6.644,2.143,0.032,1.198,27.273

0,1,2,3
Omnibus:,1014.275,Durbin-Watson:,2.058
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33941.431
Skew:,5.07,Prob(JB):,0.0
Kurtosis:,29.786,Cond. No.,927000000000000.0


In [215]:
res = fit_linear_model_for_pairwise_test(all_decisions, ('Explainer', 'ML Model'), 'modified_time', 'group_exp_combined')
res.summary()

0,1,2,3
Dep. Variable:,modified_time,R-squared:,0.042
Model:,OLS,Adj. R-squared:,0.04
Method:,Least Squares,F-statistic:,21.9
Date:,"Mon, 06 Feb 2023",Prob (F-statistic):,1.04e-17
Time:,18:10:52,Log-Likelihood:,-12118.0
No. Observations:,1988,AIC:,24250.0
Df Residuals:,1983,BIC:,24270.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,35.4005,3.870,9.148,0.000,27.811,42.990
trx_amnt,123.6949,53.372,2.318,0.021,19.024,228.366
user_name_j,-18.0578,3.639,-4.962,0.000,-25.195,-10.921
user_name_n,27.9729,3.650,7.664,0.000,20.815,35.131
user_name_t,25.4854,3.652,6.978,0.000,18.323,32.648
group_exp_combined_Explainer,12.0946,5.570,2.171,0.030,1.171,23.018

0,1,2,3
Omnibus:,1951.518,Durbin-Watson:,1.992
Prob(Omnibus):,0.0,Jarque-Bera (JB):,61323.358
Skew:,4.949,Prob(JB):,0.0
Kurtosis:,28.344,Cond. No.,4200000000000000.0


In [207]:
row_filter = (all_decisions['group_exp_combined'] == 'ML Model')

all_decisions[row_filter]

Unnamed: 0,xplz_id,group,user_name,trx_amnt,decision,decision_time,label,conf_cell,conf_mat,decision_value,modified_time,potential_revenue,accuracy,fn,fp,approved,declined,suspicious,explainer,group_exp_combined
0,0005e73512cc6b51164de77a492947cb,ML Model,j,206.86,approved,24,0.0,tn,tn,827.44,24,827.44,1,0,0,1,0,0,0,ML Model
3,0053a4cc33ce725779c115d4c8fe392b,ML Model,j,137.94,approved,8,0.0,tn,tn,551.76,8,551.76,1,0,0,1,0,0,0,ML Model
14,0165ebd364a1611a07c537b03c254032,ML Model,t,160.00,approved,16,0.0,tn,tn,640.00,16,640.00,1,0,0,1,0,0,0,ML Model
17,019ae01437ba3d10e7e57682e519adee,ML Model,j,138.16,approved,15,0.0,tn,tn,552.64,15,552.64,1,0,0,1,0,0,0,ML Model
27,0256b122ccf672affd74d8e748e8fb0f,ML Model,t,94.79,approved,30,0.0,tn,tn,379.16,30,379.16,1,0,0,1,0,0,0,ML Model
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3431,fc449bb225b1419d0a9428cfc22e565e,ML Model,j,297.69,approved,10,0.0,tn,tn,1190.76,10,1190.76,1,0,0,1,0,0,0,ML Model
3433,fc72128fadfcd54742496d5fb60abec0,ML Model,t,103.88,approved,13,0.0,tn,tn,415.52,13,415.52,1,0,0,1,0,0,0,ML Model
3450,fddaec4356dbaba60c41ddd5a3e13954,ML Model,j,201.88,approved,12,0.0,tn,tn,807.52,12,807.52,1,0,0,1,0,0,0,ML Model
3455,fe23f60dfeec279fb2565589e34784f6,ML Model,j,374.50,approved,6,0.0,tn,tn,1498.00,6,1498.00,1,0,0,1,0,0,0,ML Model


In [137]:
res = fit_linear_model_for_pairwise_test(all_decisions, ('Random', 'ML Model'), 'modified_time')
res.summary()

0,1,2,3
Dep. Variable:,modified_time,R-squared:,0.065
Model:,OLS,Adj. R-squared:,0.061
Method:,Least Squares,F-statistic:,17.05
Date:,"Mon, 06 Feb 2023",Prob (F-statistic):,1.58e-13
Time:,16:27:18,Log-Likelihood:,-5851.9
No. Observations:,993,AIC:,11710.0
Df Residuals:,988,BIC:,11740.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.549e+13,1.01e+15,-0.025,0.980,-2e+15,1.95e+15
trx_amnt,125.6812,59.319,2.119,0.034,9.275,242.088
user_name_j,2.549e+13,1.01e+15,0.025,0.980,-1.95e+15,2e+15
user_name_n,2.549e+13,1.01e+15,0.025,0.980,-1.95e+15,2e+15
user_name_t,2.549e+13,1.01e+15,0.025,0.980,-1.95e+15,2e+15
group_Random,4.6203,5.603,0.825,0.410,-6.375,15.616

0,1,2,3
Omnibus:,1125.671,Durbin-Watson:,2.061
Prob(Omnibus):,0.0,Jarque-Bera (JB):,62227.288
Skew:,5.795,Prob(JB):,0.0
Kurtosis:,40.009,Cond. No.,922000000000000.0


## Confusion Matrix

Ran spot checks for accuracy, FPR, and FNR (can do that by changing the column) for ML vs Data, and Explainer vs Model.

Corroborates our findings:


In [143]:
def fit_logit_model_for_pairwise_test(all_decisions, variants_to_compare, column):
    relevant_features = ['user_name', 'group', column, 'trx_amnt']
    
    group1 = variants_to_compare[0]
    group2 = variants_to_compare[1]
    
    row_filter = (all_decisions['group'] == group1) | (all_decisions['group'] == group2)
    
    df = all_decisions[row_filter][relevant_features]
    dummified = pd.get_dummies(df)
    
    y = dummified[column]
    X = dummified.drop(columns=column)
    
    # Dropping the control group as it is redundant
    X = X.drop('group_'+group2, axis=1)
    
    # Dropping users to see what happenes
    # X = X.drop('user_name_n', axis=1)
    # X = X.drop('user_name_j', axis=1)
    # X = X.drop('user_name_t', axis=1)

    scaler = MinMaxScaler().fit(X)
    x_scaled = scaler.transform(X)
    
    x_scaled = pd.DataFrame(x_scaled, columns=X.columns, index=X.index)
    
    x_scaled = sm.add_constant(x_scaled)
        
    mod = sm.Logit(y, x_scaled)
    results = mod.fit()
    
    return results 

#### Finding in paper -- ML models improved accuracy significantly with p = 0.03

In [162]:
res = fit_logit_model_for_pairwise_test(all_decisions, ('ML Model', 'Data'), 'accuracy')
res.summary()

Optimization terminated successfully.
         Current function value: 0.507014
         Iterations 5


0,1,2,3
Dep. Variable:,accuracy,No. Observations:,991.0
Model:,Logit,Df Residuals:,986.0
Method:,MLE,Df Model:,4.0
Date:,"Mon, 06 Feb 2023",Pseudo R-squ.:,0.005405
Time:,17:35:27,Log-Likelihood:,-502.45
converged:,True,LL-Null:,-505.18
Covariance Type:,nonrobust,LLR p-value:,0.2432

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.8883,,,,,
trx_amnt,-0.0492,1.455,-0.034,0.973,-2.900,2.802
user_name_j,0.2708,,,,,
user_name_n,0.2272,,,,,
user_name_t,0.3902,,,,,
group_ML Model,0.3408,0.158,2.156,0.031,0.031,0.651


In [166]:
math.exp(0.3408)

1.4060719983391865

#### ML model rejected fewer transactions

In [168]:
res = fit_logit_model_for_pairwise_test(all_decisions, ('ML Model', 'Data'), 'declined')
res.summary()

Optimization terminated successfully.
         Current function value: 0.484052
         Iterations 6


0,1,2,3
Dep. Variable:,declined,No. Observations:,991.0
Model:,Logit,Df Residuals:,986.0
Method:,MLE,Df Model:,4.0
Date:,"Mon, 06 Feb 2023",Pseudo R-squ.:,0.02666
Time:,17:38:36,Log-Likelihood:,-479.7
converged:,True,LL-Null:,-492.83
Covariance Type:,nonrobust,LLR p-value:,2.783e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.9726,1.71e+06,-5.7e-07,1.000,-3.34e+06,3.34e+06
trx_amnt,0.6189,1.366,0.453,0.651,-2.059,3.297
user_name_j,-0.8426,1.71e+06,-4.94e-07,1.000,-3.34e+06,3.34e+06
user_name_n,0.0922,1.71e+06,5.4e-08,1.000,-3.34e+06,3.34e+06
user_name_t,-0.2222,1.71e+06,-1.3e-07,1.000,-3.34e+06,3.34e+06
group_ML Model,-0.3453,0.162,-2.125,0.034,-0.664,-0.027


In [169]:
math.exp(-0.3453)

0.7080079192284888

#### ML model approved more transactions

In [170]:
res = fit_logit_model_for_pairwise_test(all_decisions, ('ML Model', 'Data'), 'approved')
res.summary()

Optimization terminated successfully.
         Current function value: 0.529770
         Iterations 6


0,1,2,3
Dep. Variable:,approved,No. Observations:,991.0
Model:,Logit,Df Residuals:,986.0
Method:,MLE,Df Model:,4.0
Date:,"Mon, 06 Feb 2023",Pseudo R-squ.:,0.03491
Time:,17:39:52,Log-Likelihood:,-525.0
converged:,True,LL-Null:,-543.99
Covariance Type:,nonrobust,LLR p-value:,1.133e-07

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.7412,,,,,
trx_amnt,-0.8356,1.310,-0.638,0.524,-3.404,1.733
user_name_j,0.8264,,,,,
user_name_n,-0.0972,,,,,
user_name_t,0.0120,,,,,
group_ML Model,0.5133,0.153,3.348,0.001,0.213,0.814


In [171]:
math.exp(0.5133)

1.67079573338226