In [None]:
##################################################
###     Train & Register SAS Logit Model       ###
##################################################

###################
### Credentials ###
###################

import os
import sys
from pathlib import Path

filepath = input("file path to credentials: ")
sys.path.append(filepath)
from credentials import hostname, session, port, protocol, wd, output_dir, git_dir, token_dir, token, token_refresh, token_pem, username

In [2]:
#############################
### Connect with SAS Viya ###
#############################

import swat

access_token = open(token, "r").read()
conn =  swat.CAS(hostname=hostname, username=None, password=access_token, ssl_ca_list=token_pem, protocol=protocol)
print(conn.serverstatus())

NOTE: Grid node action status report: 1 nodes, 9 total actions executed.
[About]

 {'CAS': 'Cloud Analytic Services',
  'CASCacheLocation': 'CAS Disk Cache',
  'CASHostAccountRequired': 'OPTIONAL',
  'Copyright': 'Copyright © 2014-2025 SAS Institute Inc. All Rights Reserved.',
  'GlobalReadOnlyMode': 'NO',
  'ServerTime': '2025-07-17T18:47:20Z',
  'System': {'Hostname': 'controller.sas-cas-server-default.innovationlab.svc.cluster.local',
   'Linux Distribution': 'Red Hat Enterprise Linux release 8.10 (Ootpa)',
   'Model Number': 'x86_64',
   'OS Family': 'LIN X64',
   'OS Name': 'Linux',
   'OS Release': '5.15.0-1090-azure',
   'OS Version': '#99-Ubuntu SMP Thu May 22 21:15:50 UTC 2025'},
  'Transferred': 'NO',
  'Version': '4.00',
  'VersionLong': 'V.04.00M0P06092025',
  'Viya Release': '20250702.1751427964045',
  'Viya Version': 'Stable 2025.06',
  'license': {'expires': '08Jul2027:00:00:00',
   'gracePeriod': 0,
   'site': 'CIS CREATE INNOVATION LAB (ENTERPRISE-RISK-MRM-SPI',
   'si

In [3]:
#############################
### Identify Table in CAS ###
#############################

### caslib and table to use in modeling
caslib = 'public'
in_mem_tbl = 'AML_BANK_PREP'

### load table in-memory if not already exists in-memory ###
if conn.table.tableExists(caslib=caslib, name=in_mem_tbl).exists<=0:
    conn.table.loadTable(caslib=caslib, path=str(in_mem_tbl+str('.sashdat')), 
                         casout={'name':in_mem_tbl, 'caslib':caslib, 'promote':True})
    
### show table to verify
conn.table.tableInfo(caslib=caslib, wildIgnore=False, name=in_mem_tbl)

Unnamed: 0,Name,Rows,Columns,IndexedColumns,Encoding,CreateTimeFormatted,ModTimeFormatted,AccessTimeFormatted,JavaCharSet,CreateTime,View,MultiPart,SourceName,SourceCaslib,Compressed,Creator,Modifier,SourceModTimeFormatted,SourceModTime,TableRedistUpPolicy
0,AML_BANK_PREP,14302,27,0,utf-8,2025-07-15T16:31:21+00:00,2025-07-15T16:31:21+00:00,2025-07-17T07:48:19+00:00,UTF8,2068216000.0,0,0,AML_BANK_PREP.sashdat,Public,0,Chris.Parrish@sas.com,,2025-03-31T16:07:29+00:00,2059056000.0,Not Specified


In [4]:
########################
### Create Dataframe ###
########################

dm_inputdf =  conn.CASTable(in_mem_tbl, caslib=caslib)

### print columns for review of model parameters
conn.table.columnInfo(table={"caslib":caslib, "name":in_mem_tbl})

Unnamed: 0,Column,Label,ID,Type,RawLength,FormattedLength,Format,NFL,NFD
0,account_id,unique identifier,1,double,8,12,,0,0
1,num_transactions,number of transactions financial services cust...,2,double,8,12,,0,0
2,credit_score,customer credit score,3,double,8,12,,0,0
3,marital_status_single,,4,double,8,12,,0,0
4,marital_status_married,,5,double,8,12,,0,0
5,marital_status_divorced,,6,double,8,12,,0,0
6,analytic_partition,,7,double,8,12,,0,0
7,ml_indicator,"indicator for money laundering event no (0), y...",8,double,8,12,,0,0
8,checking_only_indicator,indicator for whether customer only has a chec...,9,double,8,12,,0,0
9,prior_ctr_indicator,indicator for whether the customer has a curre...,10,double,8,12,,0,0


In [5]:
########################
### Model Parameters ###
########################

# import python libraries
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.utils import shuffle
### import actionsets
conn.loadactionset('regression')
conn.loadactionset('astore')

### model arugments
event = 'LAST'
selection_method = 'STEPWISE'

### model manager information
metadata_output_dir = 'outputs'
model_name = 'logit_SAS'
project_name = 'Risk Score'
description = 'Logit SAS'
model_type = 'Logistic Regression'
metadata_output_dir = 'outputs'

### define macro variables for model
dm_dec_target = 'ml_indicator'
dm_partitionvar = 'analytic_partition'
create_new_partition = 'no' # 'yes', 'no'
dm_key = 'account_id' 
dm_classtarget_level = ['0', '1']
dm_partition_validate_val, dm_partition_train_val, dm_partition_test_val = [0, 1, 2]
dm_partition_validate_perc, dm_partition_train_perc, dm_partition_test_perc = [0.3, 0.6, 0.1]

### create list of rejected predictor columns
rejected_predictors = [
    'atm_deposit_indicator', 
    'citizenship_country_risk', 
    'distance_to_bank',
    'distance_to_employer', 
    'income', 
    'num_acctbal_chgs_gt2000',
    'occupation_risk'
    ]

### create partition column, if not already in dataset
if create_new_partition == 'yes':
    dm_inputdf = shuffle(dm_inputdf)
    dm_inputdf.reset_index(inplace=True, drop=True)
    validate_rows = round(len(dm_inputdf)*dm_partition_validate_perc)
    train_rows = round(len(dm_inputdf)*dm_partition_train_perc) + validate_rows
    test_rows = len(dm_inputdf)-train_rows
    dm_inputdf.loc[0:validate_rows,dm_partitionvar] = dm_partition_validate_val
    dm_inputdf.loc[validate_rows:train_rows,dm_partitionvar] = dm_partition_train_val
    dm_inputdf.loc[train_rows:,dm_partitionvar] = dm_partition_test_val

### var to consider in bias assessment
bias_var = 'cross_border_trx_indicator'

NOTE: Added action set 'regression'.
NOTE: Added action set 'astore'.


In [6]:
##############################
### Final Modeling Columns ###
##############################

### create list of model variables
dm_input = list(dm_inputdf.columns.values)
macro_vars = (dm_dec_target + ' ' + dm_partitionvar + ' ' + dm_key).split()
rejected_vars = rejected_predictors + macro_vars
for i in rejected_vars:
    dm_input.remove(i)

### create prediction variables
dm_predictionvar = [str('P_') + dm_dec_target + dm_classtarget_level[0], str('P_') + dm_dec_target + dm_classtarget_level[1]]
dm_classtarget_intovar = str('I_') + dm_dec_target

In [7]:
#####################
### Training Code ###
#####################

### create names of tables for action set
astore_tbl = str(in_mem_tbl+str('_astore'))
cas_score_tbl = str(in_mem_tbl+str('_score'))
cas_out_tbl = str(in_mem_tbl+str('_model'))

### estimate & fit model
dm_model = conn.regression.logistic(
    model={"depVars":[{"name":dm_dec_target, "options":{"event":event}}], "effects":[{"vars":dm_input}],"informative":True },
    partByVar={"name":dm_partitionvar, "train":str(dm_partition_train_val), "valid":str(dm_partition_validate_val),"test":str(dm_partition_test_val)},
    output={"casOut": {"name":cas_out_tbl, "caslib":caslib, "replace":True}, "copyVars":{dm_dec_target}, "into":dm_classtarget_intovar},
    selection={"method":selection_method},
    table={"name":in_mem_tbl, "caslib":caslib},
    store={"name":astore_tbl, "caslib":caslib, "replace":True})

### score full data
conn.astore.score(
    table={"name":in_mem_tbl, "caslib":caslib}, 
    copyvars=[dm_dec_target, dm_partitionvar],
    casout={"name":cas_score_tbl, "replace":True},
    rstore={"name":astore_tbl, "caslib":caslib})
score_astore = conn.CASTable(cas_score_tbl)

### create tables with predicted values
dm_scoreddf = conn.CASTable(score_astore).to_frame()
dm_scoreddf[dm_dec_target] = dm_scoreddf[dm_dec_target].astype(int)
trainData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_train_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
testData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_test_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
validData = dm_scoreddf[dm_scoreddf[dm_partitionvar]==dm_partition_validate_val][[dm_dec_target, dm_predictionvar[1]]].rename(columns=lambda x:'0')
trainData = pd.DataFrame(trainData)
testData = pd.DataFrame(testData)
validData = pd.DataFrame(validData)

### print model & results
print(dm_model)
conn.table.tableInfo(caslib=caslib, wildIgnore=False, name=astore_tbl)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).Description)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).InputVariables)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).OutputVariables)
print(conn.astore.describe(rstore={"name":astore_tbl, "caslib":caslib}, epcode=True).epcode)
model_astore = conn.CASTable(astore_tbl, caslib=caslib)

NOTE: Convergence criterion (ABSGCONV=1E-7) satisfied.
NOTE: 751715 bytes were written to the table "AML_BANK_PREP_astore" in the caslib "public".
[ModelInfo]

 Model Information
 
          RowId             Description                        Value
 0         DATA             Data Source                AML_BANK_PREP
 1  RESPONSEVAR       Response Variable                 ml_indicator
 2         DIST            Distribution                       Binary
 3         LINK           Link Function                        Logit
 4         TECH  Optimization Technique  Newton-Raphson with Ridging

[NObs]

 Number of Observations
 
    RowId                  Description    Value  Training  Validation  Testing
 0  NREAD  Number of Observations Read  14302.0    8581.0      4291.0   1430.0
 1  NUSED  Number of Observations Used  14302.0    8581.0      4291.0   1430.0

[OutputCasTables]

    casLib                  Name Label   Rows  Columns                                           casTable
 0  Pub

In [None]:
#######################################
### Register Model in Model Manager ###
## Ensure Model Does Not Exist in MM ##
##### Using PZMM Zips Up Metadata #####
#######################################

import shutil
from sasctl import pzmm as pzmm
from sasctl import Session
from sasctl import register_model, publish_model
from sasctl._services.model_repository import ModelRepository as mr

### create session in cas
sess=Session(hostname, username=username, password=password, verify_ssl=False, protocol="http")

### create directories for metadata
output_path = Path(output_dir) / metadata_output_dir / model_name
if output_path.exists() and output_path.is_dir():
    shutil.rmtree(output_path)
os.makedirs(output_path)

### create metadata and import to model manager
pzmm.JSONFiles().calculateFitStat(trainData=trainData, testData=testData, validateData=validData, jPath=output_path)
pzmm.JSONFiles().generateROCLiftStat(dm_dec_target, int(dm_classtarget_level[1]), conn, trainData=trainData, testData=testData, validateData=validData, jPath=output_path)
file_list = os.listdir(output_path)
files = []
for i in file_list:
    new_dict = {'name':i, 'file':open(output_path / i)}
    files.append(new_dict)
with sess:
    reg_model = register_model(model_astore, model_name, project_name, files=files, force=True, version='latest')
#   pub_model = publish_model(model_name, 'maslocal')
#   score_example = pub_model.score(input1=1, input2=2, etc.)