# Predictive Modelling Leverage SAS Viya's Python API
### Demonstrating How Viya Embraces All Coding Languages

## Step 1: Import All Our Packages

In [37]:
import swat
from swat.render import render_html
from pprint import pprint
from matplotlib import pyplot as plt
import pandas as pd
import sys
import numpy as np
import sasctl
from sasctl import Session, register_model
from sasctl.services import model_management as mm
from sasctl.services import model_repository as mr
# Load libraries
import pandas as pd
import os, shutil # used to create necessary folders
import warnings
import json
warnings.filterwarnings('ignore')

# Define a directory where all the files for model manager will be kept 
MODEL_DIR = 'C:/Users/canast/Desktop/Data/'

##  Step 2: connect to Viya

In [6]:
sess = swat.CAS('https://yoururlhere.com/', 5570, 'username', 'password', protocol='http')

# Load the needed action sets for this example:
sess.loadactionset(actionset="dataStep")
sess.loadactionset(actionset="dataPreprocess")
sess.loadactionset(actionset="cardinality")
sess.loadactionset(actionset="sampling")
sess.loadactionset(actionset="decisionTree")
sess.loadactionset(actionset="astore")
sess.loadactionset(actionset="percentile")

NOTE: Added action set 'dataStep'.
NOTE: Added action set 'dataPreprocess'.
NOTE: Added action set 'cardinality'.
NOTE: Added action set 'sampling'.
NOTE: Added action set 'decisionTree'.
NOTE: Added action set 'astore'.
NOTE: Added action set 'percentile'.


## Step 3: point to our data

In [7]:
data = sess.CASTable('360DiscoverData', caslib='Public')

In [8]:
data.head()

Unnamed: 0,Session_ID,Customer_ID,Visitor_ID,Country,State_Region,State_Region_Latitude,State_Region_Longitude,City,Active_10_to_30m,Active_10_to_30s,...,Inferred_HH_Age_Range,Inferred_HH_Education,Inferred_HH_Employment,Inferred_HH_IPA,Inferred_Approx_Age,Inferred_Education,Inferred_Employment,Inferred_IPA,Inferred_Income,Inferred_Lifestage_Group
0,34307050.0,,,United States,New Mexico,33.949081,-104.184361,Placitas,0.0,0.0,...,Age <55,High School Grad,"WC, Service, Mix",Low,55.0,2.0,2.0,1.0,1.0,7.0
1,34255501.0,,,United States,New Mexico,33.949081,-104.184361,Placitas,0.0,0.0,...,Age <55,High School Grad,"WC, Service, Mix",Low,55.0,2.0,2.0,1.0,1.0,7.0
2,34007127.0,8152855.0,11435875.0,United States,New Mexico,33.949081,-104.184361,Placitas,0.0,0.0,...,Age <55,High School Grad,"WC, Service, Mix",Low,55.0,2.0,2.0,1.0,1.0,7.0
3,34304914.0,,,United States,Georgia,32.723359,-82.298483,Savannah,0.0,0.0,...,Age <55,High School Grad,"WC, Service, Mix",Low,55.0,2.0,2.0,1.0,1.0,7.0
4,34253365.0,,,United States,Georgia,32.723359,-82.298483,Savannah,0.0,0.0,...,Age <55,High School Grad,"WC, Service, Mix",Low,55.0,2.0,2.0,1.0,1.0,7.0


## Step 4. Prepare Our Data

In [11]:
##Partition Our DAta
sess.sampling.stratified(
  table={"caslib":"Public", "name":"360DiscoverData", "groupBy":"Goal_SiteConversion_Ind"},
  output={"casOut":{"name":"PartitionedData", "replace":True}, "copyVars":"ALL"},
  samppct=70,
  partind=True
)


castbl_train ={"name":"PartitionedData", "where":"strip(put(_partind_, best.))='0'"}
castbl_test = {"name":"PartitionedData", "where":"strip(put(_partind_, best.))='1'"}

NOTE: Stratified sampling is in effect.
NOTE: Using SEED=1349373212 for sampling.


Unnamed: 0,casLib,Name,Label,Rows,Columns,casTable
0,CASUSER(alex),PartitionedData,,51549,82,"CASTable('PartitionedData', caslib='CASUSER(al..."

Unnamed: 0,ByGrpID,Goal_SiteConversion_Ind,NObs,NSamp
0,0,No,46611,32628
1,1,Yes,4938,3457


In [12]:
##Define Inputs and Outputs
target = "Goal_SiteConversion_Ind"
features = data.columns[8:28]

## Step 5. Create Predictive Model

In [38]:
# Train the gradient boosting model
sess.decisionTree.gbtreeTrain(
  table = castbl_train,
  inputs=list(features),
  target=target,
  nTree=10,
  nBins=20,
  maxLevel=6,
  varImp=True,
  missing="USEINSEARCH",
  casOut={"name":"GB_model", "replace":True},
  savestate={"name":'GB_model_astore', "replace":True}
)

#Score the gradient boosting model
gbt_score_obj = sess.decisionTree.gbtreeScore(
   table        = castbl_train,
   model        = "GB_model",
   casout       = {"name":"gbt_scored_train", "replace":True},
   copyVars     = target,
   encodename   = True,
   assessonerow = True
)

sess.percentile.assess(
   table    = {"name":"gbt_scored_train"},
   inputs = 'p_' + target + 'Yes',
   casout = {"name":"gbt_assess_train","replace":True},
   response = target,
   event = "Yes"
)

# Test Data
#Score the gradient boosting model
gbt_score_obj = sess.decisionTree.gbtreeScore(
   table        = castbl_test,
   model        = "GB_model",
   casout       = {"name":"gbt_scored_test","replace":True},
   copyVars     = target,
   encodename   = True,
   assessonerow = True
)

# generate performance metrics on test data
sess.percentile.assess(
   table    = {"name":"gbt_scored_test"},
   inputs = 'p_' + target + 'Yes',
   casout = {"name":"gbt_assess_test","replace":True},
   response = target,
   event = "Yes"
)

# grab ROC metric for train data
gbt_assess_ROC_train = sess.CASTable(name="gbt_assess_train_ROC", replace = True)
gbt_assess_ROC_train = gbt_assess_ROC_train.to_frame()
gbt_assess_ROC_train['Model'] = 'Gradient Boosting'
gbt_assess_ROC_train['_DataRole_'] = 'TRAIN'
gbt_assess_ROC_train['_PartInd_'] = '1'

# grab ROC metric for test data
gbt_assess_ROC_test = sess.CASTable(name="gbt_assess_test_ROC", replace = True)
gbt_assess_ROC_test = gbt_assess_ROC_test.to_frame()
gbt_assess_ROC_test['Model'] = 'Gradient Boosting'
gbt_assess_ROC_test['_DataRole_'] = 'TEST'
gbt_assess_ROC_test['_PartInd_'] = '2'

# load a templace that has a proper ROC metric format that Model Manager will recognize
with open('data/dmcas_roc_template_class.json') as json_file:
    roc_json = json.load(json_file)
    
import copy

# populate the format with Train and Test data ROC metrics
dataMaps = list()
for gbt_assess_ROC in [gbt_assess_ROC_train, gbt_assess_ROC_test]:
    for i in range(len(gbt_assess_ROC)):
        temp = roc_json['data'][0]
        temp['dataMap'].pop('_formattedPartition_', None)
        temp['dataMap']['_ACC_'] = str(gbt_assess_ROC['_ACC_'][i])
        temp['dataMap']['_TP_'] = str(int(gbt_assess_ROC['_TP_'][i]))
        temp['dataMap']['_OneMinusSpecificity_'] = str(gbt_assess_ROC['_Specificity_'][i])
        temp['dataMap']['_Column_'] = str(gbt_assess_ROC['_Column_'][i])
        temp['dataMap']['_TN_'] = str(int(gbt_assess_ROC['_TN_'][i]))
        temp['dataMap']['_KS2_'] = str(gbt_assess_ROC['_KS2_'][i])
        temp['dataMap']['_FPR_'] = str(gbt_assess_ROC['_FPR_'][i])
        temp['dataMap']['_DataRole_'] = gbt_assess_ROC['_DataRole_'][i]
        temp['dataMap']['_FDR_'] = str(gbt_assess_ROC['_FDR_'][i])
        temp['dataMap']['_MiscEvent_'] = str(gbt_assess_ROC['_MiscEvent_'][i])
        temp['dataMap']['_FN_'] = str(int(gbt_assess_ROC['_FN_'][i]))
        temp['dataMap']['_KS_'] = str(gbt_assess_ROC['_KS_'][i])
        temp['dataMap']['_Sensitivity_'] = str(gbt_assess_ROC['_Sensitivity_'][i])
        temp['dataMap']['_Event_'] = str(gbt_assess_ROC['_Event_'][i])
        temp['dataMap']['_F1_'] = str(gbt_assess_ROC['_F1_'][i])
        temp['dataMap']['_FP_'] = str(int(gbt_assess_ROC['_FP_'][i]))
        temp['dataMap']['_Cutoff_'] = str(gbt_assess_ROC['_Cutoff_'][i])
        temp['dataMap']['_Specificity_'] = str(1+gbt_assess_ROC['_Specificity_'][i])
        temp['dataMap']['_FHALF_'] = str(gbt_assess_ROC['_FHALF_'][i])
        temp['dataMap']['_PartInd_'] = gbt_assess_ROC['_PartInd_'][i]
        dataMaps.append(copy.deepcopy(temp))
roc_json['data'] = dataMaps

# save the file locally
with open(MODEL_DIR + '/dmcas_roc.json','w') as json_file:
    json.dump(roc_json, json_file)

NOTE: 414306 bytes were written to the table "GB_model_astore" in the caslib "CASUSER(alex)".


## Step 6. Use SASCTL Package to register model to model Project, along with associated files

In [39]:
# Use sasctl to connect to SAS
Session('yoururlhere.com', 'username', '*****')

<sasctl.core.Session at 0x199b2dfcc50>

In [43]:
# Connect to SAS Model Manager, load saved gradient boosting model, create project and register the model
# ship newly generated ROC metric file to Model Manager
astore = sess.CASTable('GB_model_astore')
model = register_model(astore, name = 'GB_model', project = 'Marketing - Conversion Propensity')
# add roc chart
file = open(MODEL_DIR + '/' + 'dmcas_roc.json', 'rb')
mr.add_model_content(model='GB_model', file=file, name='dmcas_roc.json', role='')
file.close()

NOTE: Added action set 'astore'.
NOTE: 414306 bytes were downloaded from the table "GB_MODEL_ASTORE" in the caslib "CASUSER(alex)".
NOTE: Cloud Analytic Services saved the file _C71AF226754840F9953D08F63.sashdat in caslib ModelStore.
