In [None]:
###################
### Credentials ###
###################

import keyring
import runpy
import os
import urllib3
urllib3.disable_warnings()

### run script that contains username, password, hostname, working directory, and output directory
    ### ...OR define directly in this script
from password_poc import hostname, output_dir, wd
runpy.run_path(path_name='password_poc.py')
username = keyring.get_password('cas', 'username')
password = keyring.get_password('cas', username)
metadata_output_dir = 'outputs'

###################
### Environment ###
###################

import swat
import pandas as pd

port = 443
os.environ['CAS_CLIENT_SSL_CA_LIST']=str(wd)+str('/ca_cert_poc.pem')
conn =  swat.CAS(hostname, port, username=username, password=password, protocol='http')
print(conn)
print(conn.serverstatus())

#############################
### Identify Table in CAS ###
#############################

### caslib and table to use in modeling
caslib = 'Public'
in_mem_tbl = 'AML_BANK_PREP'

### load table in-memory if not already exists in-memory ###
if conn.table.tableExists(caslib=caslib, name=in_mem_tbl).exists<=0:
    conn.table.loadTable(caslib=caslib, path=str(in_mem_tbl+str('.sashdat')), 
                         casout={'name':in_mem_tbl, 'caslib':caslib, 'promote':True})
    
### show table to verify
conn.table.tableInfo(caslib=caslib, wildIgnore=False, name=in_mem_tbl)

### create names of tables for action set
astore_tbl = str(in_mem_tbl+str('_astore'))
cas_score_tbl = str(in_mem_tbl+str('_score'))
cas_out_tbl = str(in_mem_tbl+str('_model'))

########################
### Create Dataframe ###
########################

dm_inputdf =  conn.CASTable(in_mem_tbl, caslib=caslib)

### print columns for review of model parameters
conn.table.columnInfo(table={"caslib":caslib, "name":in_mem_tbl})

########################
### Model Parameters ###
########################

# import python libraries
import numpy as np
import pandas as pd
from pathlib import Path
### import actionsets
conn.loadactionset('decisionTree')
conn.loadactionset('astore')
conn.loadactionset('fairAITools')

xgb_params = dict(
    m=20,
    seed=12345,
    nTree=100,
    learningRate=0.1,
    subSampleRate=0.5,
    lasso=0,
    ridge=1,
    distribution="binary",
    maxBranch=2,
    maxLevel=5,
    leafSize=5,
    missing="useinsearch",
    minUseInSearch=1,
    nBins=50,
    quantileBin=True
    )

early_stop_params = dict(
    metric="MCR",
    stagnation=5,
    tolerance=0,
    minimum=False,
    threshold=0,
    thresholdIter=0
    )
print(xgb_params)
print(early_stop_params)

### model manager information
model_name = 'gradboost_cas'
project_name = 'AML Risk Score'
description = 'GradBoost CAS'
model_type = 'gradient_boost'

### define macro variables for model
dm_dec_target = 'ml_indicator'
dm_partitionvar = 'analytic_partition' 
dm_key = 'account_id' 
dm_classtarget_level = ['0', '1']
dm_partition_validate_val, dm_partition_train_val, dm_partition_test_val = [0, 1, 2]

### create list of rejected predictor columns
rejected_predictors = [
    'atm_deposit_indicator', 
    'citizenship_country_risk', 
    'distance_to_bank',
    'distance_to_employer', 
    'income', 
    'num_acctbal_chgs_gt2000',
    'occupation_risk'
    ]

### var to consider in bias assessment
bias_var = 'cross_border_trx_indicator'

##############################
### Final Modeling Columns ###
##############################

### create list of model variables
dm_input = list(dm_inputdf.columns.values)
macro_vars = (dm_dec_target + ' ' + dm_partitionvar + ' ' + dm_key).split()
rejected_vars = rejected_predictors + macro_vars
for i in rejected_vars:
    dm_input.remove(i)

### create prediction variables
dm_predictionvar = [str('P_') + dm_dec_target + dm_classtarget_level[0], str('P_') + dm_dec_target + dm_classtarget_level[1]]
dm_classtarget_intovar = str('I_') + dm_dec_target

##################
### Data Split ###
##################

train_part = str(dm_partitionvar)+str('=')+str(dm_partition_train_val)
test_part = str(dm_partitionvar)+str('=')+str(dm_partition_test_val)
valid_part = str(dm_partitionvar)+str('=')+str(dm_partition_validate_val)

#####################
### Training Code ###
#####################

### estimate & fit model
dm_model = conn.decisionTree.gbtreeTrain(**xgb_params,
    table=dict(caslib=caslib, name=in_mem_tbl, where=train_part),
    target=dm_dec_target,
    inputs=dm_input,
    encodeName=True,
    nominal=dm_dec_target,
    casOut=dict(caslib=caslib, name=cas_out_tbl, replace=True),
    earlyStop=early_stop_params,
    saveState=dict(caslib=caslib, name=astore_tbl, replace=True),
    )

### score full data
conn.astore.score(
    table=dict(caslib=caslib, name=in_mem_tbl), 
    copyvars=[dm_dec_target, dm_partitionvar],
    casout=dict(name=cas_score_tbl, replace=True),
    rstore=dict(caslib=caslib, name=astore_tbl)
    )
score_astore = conn.CASTable(cas_score_tbl)

### create tables with predicted values
dm_scoreddf = conn.CASTable(score_astore).to_frame()
dm_scoreddf[dm_dec_target] = dm_scoreddf[dm_dec_target].astype(int)
trainData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_train_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
testData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_test_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
validData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_validate_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
trainData = pd.DataFrame(trainData)
testData = pd.DataFrame(testData)
validData = pd.DataFrame(validData)

### print model & results
print(dm_model)
conn.table.tableInfo(caslib=caslib, wildIgnore=False, name=astore_tbl)
print(conn.astore.describe(rstore=dict(name=astore_tbl, caslib=caslib), epcode=True).Description)
print(conn.astore.describe(rstore=dict(name=astore_tbl, caslib=caslib), epcode=True).InputVariables)
print(conn.astore.describe(rstore=dict(name=astore_tbl, caslib=caslib), epcode=True).OutputVariables)
print(conn.astore.describe(rstore=dict(name=astore_tbl, caslib=caslib), epcode=True).epcode)
model_astore = conn.CASTable(astore_tbl, caslib=caslib)

In [8]:
##########################
### Assess Bias Action ###
##########################

conn.fairAITools.assessBias(
    table = dict(caslib=caslib, name=in_mem_tbl),
    modelTable = dict(caslib=caslib, name=astore_tbl),
    modelTableType = "ASTORE",
    response = dm_dec_target,
    predictedVariables = dm_predictionvar,
    responseLevels = dm_classtarget_level,
    sensitiveVariable = bias_var
    )

NOTE: Active Session now server.


Unnamed: 0,Metric,MetricLabel,Value,Base,Compare,Note
0,DemographicParity,Demographic Parity (Statistical Parity),0.06171,0,1,
1,PredictiveParity,Predictive Parity,0.061391,0,1,
2,EqualAccuracy,Equal Accuracy,0.008491,0,1,
3,EqualizedOdds,Equalized Odds,0.044135,0,1,Max FPR difference is greater than max TPR dif...
4,EqualOpportunity,Equal Opportunity,0.003568,0,1,

Unnamed: 0,Group,N,TP,FP,TN,FN,TPR,FPR,TNR,FNR,cutoffKS,GAIN,LIFT,RESP,CUMRESP,CUMLIFT,INTO_EVENT,PREDICTED_EVENT,P_ml_indicator0,P_ml_indicator1
0,0,7133.0,6996.0,18.0,111.0,8.0,0.998858,0.139535,0.860465,0.001142,0.859323,0.019417,1.019417,5.097087,10.194175,1.019417,0.983317,0.980659,0.980659,0.019341
1,1,7169.0,6551.0,56.0,531.0,31.0,0.99529,0.0954,0.9046,0.00471,0.89989,0.090854,1.090854,5.454269,10.908538,1.090854,0.921607,0.919268,0.919268,0.080732

Unnamed: 0,Metric,MetricLabel,Value,Base,Compare
0,P_ml_indicator0,Average Predicted: ml_indicator=0,0.061391,0,1
1,P_ml_indicator1,Average Predicted: ml_indicator=1,0.061391,1,0
2,TPR,True Positive Rate,0.003568,0,1
3,FPR,False Positive Rate,0.044135,0,1
4,TNR,True Negative Rate,0.044135,1,0
5,FNR,False Negative Rate,0.003568,1,0
6,FDR,False Discovery Rate,0.00591,1,0
7,ACC,Accuracy,0.008491,0,1
8,C,Area Under ROC,0.00692,1,0
9,F1,F1 Score,0.004742,0,1
