In [93]:
#####################################################
###       Train & Register SAS gbTree Model        ###
#####################################################

###################
### Credentials ###
###################

import sys

filepath = input("file path to credentials: ")
sys.path.append(filepath)
from credentials import hostname, session, protocol, token, token_pem

In [94]:
#############################
### Connect with SAS Viya ###
#############################

import swat

access_token = open(token, "r").read()
conn =  swat.CAS(hostname=hostname, username=None, password=access_token, ssl_ca_list=token_pem, protocol=protocol)
print(conn.serverstatus())

NOTE: Grid node action status report: 3 nodes, 9 total actions executed.
[About]

 {'CAS': 'Cloud Analytic Services',
  'CASCacheLocation': 'CAS Disk Cache',
  'CASHostAccountRequired': 'OPTIONAL',
  'Copyright': 'Copyright Â© 2014-2025 SAS Institute Inc. All Rights Reserved.',
  'GlobalReadOnlyMode': 'NO',
  'ServerTime': '2025-11-07T17:13:27Z',
  'System': {'Hostname': 'controller.sas-cas-server-default.viya.svc.cluster.local',
   'Linux Distribution': 'Red Hat Enterprise Linux release 8.10 (Ootpa)',
   'Model Number': 'x86_64',
   'OS Family': 'LIN X64',
   'OS Name': 'Linux',
   'OS Release': '5.15.0-1091-azure',
   'OS Version': '#100-Ubuntu SMP Tue May 27 21:41:06 UTC 2025'},
  'Transferred': 'NO',
  'Version': '4.00',
  'VersionLong': 'V.04.00M0P07072025',
  'Viya Release': '20250816.1755312373510',
  'Viya Version': 'Stable 2025.07',
  'license': {'expires': '06Mar2026:00:00:00',
   'gracePeriod': 0,
   'site': 'ENGAGE PLATFORM FINANCIAL CRIMES ANALYTICS PREMIER',
   'siteNum':

In [95]:
#############################
### Identify Table in CAS ###
#############################

### caslib and table to use in modeling
caslib = 'public'
in_mem_tbl = 'ANNUITY_ADVISORS_PREP'

### load table in-memory if not already exists in-memory
if conn.table.tableExists(caslib=caslib, name=in_mem_tbl).exists<=0:
    conn.table.loadTable(caslib=caslib, path=str(in_mem_tbl+str('.sashdat')), 
                         casout={'name':in_mem_tbl, 'caslib':caslib, 'promote':True})

### show table to verify
conn.table.tableInfo(caslib=caslib, wildIgnore=False, name=in_mem_tbl)

NOTE: Cloud Analytic Services made the file ANNUITY_ADVISORS_PREP.sashdat available as table ANNUITY_ADVISORS_PREP in caslib public.


Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime,TableRedistUpPolicy
0,ANNUITY_ADVISORS_PREP,15351,33,0,utf-8,2025-11-07T17:13:49+00:00,2025-11-07T17:13:49+00:00,2025-11-07T17:13:49+00:00,UTF8,2078155000.0,0,0,ANNUITY_ADVISORS_PREP.sashdat,Public,0,chris.parrish@sas.com,,2025-11-05T16:20:31+00:00,2077979000.0,Not Specified


In [96]:
###############################
### Create CAS Table Object ###
###############################

dm_inputdf =  conn.CASTable(in_mem_tbl, caslib=caslib)

### print columns for review of model parameters
conn.table.columnInfo(table=dict(caslib=caslib, name=in_mem_tbl))

Unnamed: 0,Column,Label,ID,Type,RawLength,FormattedLength,Format,NFL,NFD
0,advisor,,1,double,8,12,,0,0
1,advisor_event_indicator,,2,double,8,12,,0,0
2,sf_face_2_face,,3,double,8,12,,0,0
3,sf_call_outbound,,4,double,8,12,,0,0
4,sf_call_inbound,,5,double,8,12,,0,0
5,sf_email_inbound,,6,double,8,12,,0,0
6,channel_bank,,7,double,8,12,,0,0
7,channel_wirehouse,,8,double,8,12,,0,0
8,channel_ria,,9,double,8,12,,0,0
9,primary_prod_sold_fixed,,10,double,8,12,,0,0


In [97]:
########################
### Model Parameters ###
########################

### model arugments
m=20
seed=12345
nTree=100
learningRate=0.1
subSampleRate=0.5
lasso=0
ridge=1
distribution="binary"
maxBranch=2
maxLevel=5
leafSize=5
missing="useinsearch"
minUseInSearch=1
nBins=50
quantileBin=True

early_stop_params = dict(
    metric="MCR",
    stagnation=5,
    tolerance=0,
    minimum=False,
    threshold=0,
    thresholdIter=0
    )

### model manager information
model_name = 'agent_advisor_gbtree_action_python'
project_name = 'Agent Advisor Propensity to Sell'
description = 'gbtree_action_python'
model_type = 'gradient_boosting'

### define macro variables for model
dm_dec_target = 'advisor_event_indicator'
dm_partitionvar = 'analytic_partition' 
dm_key = 'advisor' 
dm_classtarget_level = ['0', '1']
dm_partition_validate_val, dm_partition_train_val, dm_partition_test_val = [0, 1, 2]

In [None]:
### get column information and list of column names from a CAS table
conn.loadactionset("dataSciencePilot")

conn.dataSciencePilot.exploreData(
     table = dict(caslib=caslib, name=in_mem_tbl),
     casOut = dict(name="table_info_out", replace=True),
     explorationPolicy = dict()
     )
df = conn.fetch(table = dict(name="table_info_out"), to=100)['Fetch'] # 'to=' should capture the # of columns in the table
dm_input = df['Variable'].tolist()

NOTE: Added action set 'dataSciencePilot'.


In [48]:
### create list of regressors
keep_predictors = [
    ]
rejected_predictors = [
    'channel_ria',
    'region_we',
    'primary_prod_sold_fixed',
    'wholesaler'
    ] 

### var to consider in bias assessment
bias_vars = ['sf_face_2_face']

### var to consider in partial dependency
pd_var1 = ''
pd_var2 = ''

In [57]:
##############################
### Final Modeling Columns ###
##############################

### create list of model variables
dm_input = list(dm_inputdf.columns.values)
macro_vars = (dm_dec_target + ' ' + dm_partitionvar + ' ' + dm_key).split()
#rejected_predictors = [i for i in dm_input if i not in keep_predictors]
rejected_vars = rejected_predictors + macro_vars #(include macro_vars if rejected_predictors are explicitly listed - not contra keep_predictors)
for i in rejected_vars:
    dm_input.remove(i)
print(dm_input)

### create prediction variables
dm_predictionvar = [str('P_') + dm_dec_target + dm_classtarget_level[0], str('P_') + dm_dec_target + dm_classtarget_level[1]]
dm_classtarget_intovar = str('I_') + dm_dec_target

### create partition objects
train_part = str(dm_partitionvar)+str('=')+str(dm_partition_train_val)
test_part = str(dm_partitionvar)+str('=')+str(dm_partition_test_val)
valid_part = str(dm_partitionvar)+str('=')+str(dm_partition_validate_val)

['sf_face_2_face', 'sf_call_outbound', 'sf_call_inbound', 'sf_email_inbound', 'channel_bank', 'channel_wirehouse', 'primary_prod_sold_va', 'sf_email_campaigns', 'advisor_hh_children', 'annuity_mkt_opp', 'advisor_advising_years', 'advisor_aum', 'advisor_annuity_selling_years', 'advisor_age', 'advisor_net_worth', 'advisor_credit_hist_mos', 'advisor_firm_changes', 'advisor_credit_score', 'region_ca', 'region_ny', 'region_fl', 'region_tx', 'region_ne', 'region_so', 'region_mw', 'sf_email_responses']


In [58]:
### use dataSciencePilot to select features
conn.loadactionset("dataSciencePilot")

conn.dataSciencePilot.selectFeatures(
     table = dict(caslib='public', name='annuity_advisors_prep'),
     target = dm_dec_target,
     selectionPolicy = dict(criterion="SU"),
     inputs = dm_input,
     casOut = dict(name='agent_advisor_features', replace=True)
 )

NOTE: Added action set 'dataSciencePilot'.


Unnamed: 0,casLib,Name,Rows,Columns,casTable
0,CASUSER(chris.parrish@sas.com),agent_advisor_features,10,4,"CASTable('agent_advisor_features', caslib='CAS..."


In [59]:
results = conn.fetch(table = dict(name='agent_advisor_features'))
results

Unnamed: 0,Variable,Target,Rank,CritValue
0,sf_call_inbound,advisor_event_indicator,1.0,0.233597
1,advisor_hh_children,advisor_event_indicator,2.0,0.140637
2,advisor_net_worth,advisor_event_indicator,3.0,0.116597
3,sf_email_campaigns,advisor_event_indicator,4.0,0.105315
4,advisor_advising_years,advisor_event_indicator,5.0,0.072528
5,advisor_credit_score,advisor_event_indicator,6.0,0.071365
6,sf_face_2_face,advisor_event_indicator,7.0,0.067302
7,sf_email_responses,advisor_event_indicator,8.0,0.061159
8,advisor_firm_changes,advisor_event_indicator,9.0,0.046331
9,advisor_credit_hist_mos,advisor_event_indicator,10.0,0.046127


In [60]:
col_df = conn.fetch(table = dict(name="agent_advisor_features"), to=100)['Fetch']
dm_input = col_df['Variable'].tolist()

In [None]:
#####################
### Training Code ###
#####################

### import packages
conn.loadactionset('decisionTree')
conn.loadactionset('astore')
conn.loadactionset('explainModel')
conn.loadactionset('fairAITools')
conn.loadactionset('percentile')

### create names of tables for action set
astore_tbl = str(in_mem_tbl+str('_astore'))
cas_score_tbl = str(in_mem_tbl+str('_score'))
cas_out_tbl = str(in_mem_tbl+str('_model'))

### estimate & fit model
dm_model = conn.decisionTree.gbtreeTrain(
    earlyStop=early_stop_params,
    table=dict(caslib=caslib, name=in_mem_tbl, where=train_part),
    target=dm_dec_target,
    nominal=dm_dec_target,
    inputs=dm_input,
    encodeName=True,
    casOut=dict(caslib=caslib, name=cas_out_tbl, replace=True),
    saveState=dict(caslib=caslib, name=astore_tbl, replace=True),
    m=m, seed=seed, nTree=nTree, learningRate=learningRate, subSampleRate=subSampleRate, 
    lasso=lasso, ridge=ridge, distribution=distribution, maxBranch=maxBranch, 
    maxLevel=maxLevel, leafSize=leafSize, missing=missing, minUseInSearch=minUseInSearch, 
    nBins=nBins, quantileBin=quantileBin 
    )

NOTE: Added action set 'decisionTree'.
NOTE: Added action set 'astore'.
NOTE: Added action set 'explainModel'.
NOTE: Added action set 'fairAITools'.
NOTE: Added action set 'percentile'.
NOTE: Added action set 'modelPublishing'.
NOTE: 116219 bytes were written to the table "ANNUITY_ADVISORS_PREP_astore" in the caslib "public".


In [79]:
##################
### Score Code ###
##################

### score full data
conn.decisionTree.dtreeScore(
    modelTable=dict(caslib=caslib, name=cas_out_tbl),
    table=dict(caslib=caslib, name=in_mem_tbl), 
    copyvars=[dm_dec_target, dm_partitionvar],
    casout=dict(caslib=caslib, name=cas_score_tbl, replace=True),
    encodeName=True,
    assessOneRow=True
    )

### create score code
conn.decisionTree.gbtreeCode(
  modelTable=dict(caslib=caslib, name=cas_out_tbl),
  code=dict(casOut=dict(caslib=caslib, name='gbtree_scorecode', replace=True, promote=False))
  )

Unnamed: 0,casLib,Name
0,Public,gbtree_scorecode


In [88]:
####################
### Assess Model ###
####################

conn.percentile.assess(
  table=dict(caslib=caslib, name=cas_score_tbl),
  event="1",
  response=dm_dec_target,
  inputs=dm_predictionvar[1],
  cutStep=0.0001,
  casOut=dict(caslib=caslib, name='gbtree_action_python_assess', replace=True)
  )

Unnamed: 0,casLib,Name,Rows,Columns,casTable
0,Public,gbtree_action_python_assess,20,21,"CASTable('gbtree_action_python_assess', caslib..."
1,Public,gbtree_action_python_assess_ROC,10000,22,"CASTable('gbtree_action_python_assess_ROC', ca..."


In [None]:
assessment = conn.fetch(table = dict(caslib=caslib, name="gbtree_action_python_assess_roc", where='_KS_=1'))['Fetch']
#df = assessment['column'].tolist()
assessment

Unnamed: 0,_Column_,_Event_,_Cutoff_,_TP_,_FP_,_FN_,_TN_,_Sensitivity_,_Specificity_,_KS_,_FPR_,_ACC_,_FDR_,_F1_,_C_,_GINI_,_GAMMA_,_TAU_,_MiscEvent_,_FNR_
0,P_advisor_event_indicator1,1,0.3733,4518.0,217.0,88.0,10528.0,0.980894,0.979805,1.0,0.020195,0.980132,0.045829,0.967348,0.997934,0.995868,0.995873,0.418328,0.019868,0.019106


In [64]:
###################
### Assess Bias ###
###################

conn.fairAITools.assessBias(
		table = dict(caslib=caslib, name=in_mem_tbl),
		modelTable = dict(caslib=caslib, name=astore_tbl),
		modelTableType = "ASTORE",
		response = dm_dec_target,
		predictedVariables = dm_predictionvar,
		responseLevels = dm_classtarget_level,
		sensitiveVariable = bias_vars[0]
        )

NOTE: The option event is not specified. Response level '0' is used as an event.


Unnamed: 0,Metric,MetricLabel,Value,Base,Compare,Note
0,DemographicParity,Demographic Parity (Statistical Parity),0.272161,1.0,0.0,
1,PredictiveParity,Predictive Parity,0.266379,1.0,0.0,
2,EqualAccuracy,Equal Accuracy,0.005617,1.0,0.0,
3,EqualizedOdds,Equalized Odds,0.042616,1.0,0.0,The maximum FPR difference is greater than the...
4,EqualOpportunity,Equal Opportunity,0.01027,1.0,0.0,

Unnamed: 0,Group,N,TP,FP,TN,FN,TPR,FPR,TNR,FNR,cutoffKS,GAIN,LIFT,RESP,CUMRESP,CUMLIFT,INTO_EVENT,PREDICTED_EVENT,P_advisor_event_indicator0,P_advisor_event_indicator1
0,0.0,10661.0,6461.0,96.0,3981.0,123.0,0.981318,0.023547,0.976453,0.018682,0.957772,0.622114,1.622114,8.110571,16.221142,1.622114,0.615045,0.61805,0.61805,0.38195
1,1.0,4690.0,4126.0,35.0,494.0,35.0,0.991589,0.066163,0.933837,0.008411,0.925426,0.129536,1.129536,5.647681,11.295362,1.129536,0.887207,0.884429,0.884429,0.115571

Unnamed: 0,Metric,MetricLabel,Value,Base,Compare
0,P_advisor_event_indicator0,Average Predicted: ADVISOR_EVENT_INDICATOR=0,0.266379,1.0,0.0
1,P_advisor_event_indicator1,Average Predicted: ADVISOR_EVENT_INDICATOR=1,0.266379,0.0,1.0
2,TPR,True Positive Rate,0.01027,1.0,0.0
3,FPR,False Positive Rate,0.042616,1.0,0.0
4,TNR,True Negative Rate,0.042616,0.0,1.0
5,FNR,False Negative Rate,0.01027,0.0,1.0
6,FDR,False Discovery Rate,0.006229,0.0,1.0
7,ACC,Accuracy,0.005617,1.0,0.0
8,C,Area under ROC,0.001515,0.0,1.0
9,F1,F1 Score,0.008254,1.0,0.0


In [65]:
import pandas as pd

### score full data
conn.astore.score(
    table={"name":in_mem_tbl, "caslib":caslib}, 
    copyvars=[dm_dec_target, dm_partitionvar],
    casout={"name":cas_score_tbl, "replace":True},
    rstore={"name":astore_tbl, "caslib":caslib})
score_astore = conn.CASTable(cas_score_tbl)

### create tables with predicted values
dm_scoreddf = conn.CASTable(score_astore).to_frame()
dm_scoreddf[dm_dec_target] = dm_scoreddf[dm_dec_target].astype(int)
trainData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_train_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
testData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_test_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
validData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_validate_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
trainData = pd.DataFrame(trainData)
testData = pd.DataFrame(testData)
validData = pd.DataFrame(validData)

### print model & results
print(dm_model)
conn.table.tableInfo(caslib=caslib, wildIgnore=False, name=astore_tbl)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).Description)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).InputVariables)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).OutputVariables)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).epcode)
model_astore = conn.CASTable(astore_tbl, caslib=caslib)

[EncodedName]

         LEVNAME  LEVINDEX                     VARNAME
 0             1         0  P_advisor_event_indicator1
 1             0         1  P_advisor_event_indicator0

[EncodedTargetName]

   LEVNAME  LEVINDEX                    VARNAME
 0                 0  I_advisor_event_indicator

[ModelInfo]

 Gradient Boosting Tree for ANNUITY_ADVISORS_PREP
 
                                Descr     Value
 0                    Number of Trees    100.00
 1                       Distribution      2.00
 2                      Learning Rate      0.10
 3                   Subsampling Rate      0.50
 4   Number of Selected Variables (M)     10.00
 5                     Number of Bins     50.00
 6                Number of Variables     10.00
 7           Max Number of Tree Nodes     31.00
 8           Min Number of Tree Nodes     11.00
 9             Max Number of Branches      2.00
 10            Min Number of Branches      2.00
 11              Max Number of Levels      5.00
 12         

In [70]:
from sasctl import Session
sess = Session(hostname=session, token=access_token, client_secret='access_token')

In [71]:
#######################################
### Register Model in Model Manager ###
#######################################

from sasctl import register_model, publish_model

### create session in cas
access_token = open(token, "r").read()

with sess:
    model = register_model(model=model_astore, name=model_name, project=project_name, version='latest') #force=True

NOTE: Added action set 'astore'.
NOTE: 119203 bytes were downloaded from the table "ANNUITY_ADVISORS_PREP_ASTORE" in the caslib "Public".
