In [1]:
import numpy as np
import pandas as pd
import joblib
from datetime import datetime

In [21]:
from arthurai import ArthurAI
from arthurai.common.constants import InputType, OutputType, Stage, ValueType
from arthurai.core.attributes import AttributeCategory, AttributeBin

## Set up connection to API

In [3]:
URL = "app.arthur.ai"
ACCESS_KEY = ""

connection = ArthurAI(url=URL, access_key=ACCESS_KEY, client_version=3)

In [4]:
model = connection.model(partner_model_id="MEPS_drift",
                         input_type=InputType.Tabular,
                         output_type=OutputType.Multiclass,
                         is_batch=True)

# uncomment the below if you want to get the same model object that you have already created
# model = connection.get_model('MEPS_drift', id_type='partner_model_id')

## Set up ArthurModel
Everything under this header is *necessary* for an `ArthurModel` to be created; additional functionality is not 
possible until after `model.save()` has been successfully called.

*Some context about this dataset:*

- label: `UTILIZATION`, where 1: >10 visits, 0: <10 visits

- protected attribute: `RACE`, where 1: `White`, 0: `Non-White`

In [5]:
# train_data_full contains the X's and the Y's
all_data = pd.read_parquet('data/fulldata_train.parquet')

In [6]:
# quick summary of what all_data looks like
all_data.head()

Unnamed: 0,AGE,RACE,PCS42,MCS42,K6SUM42,REGION1,REGION2,REGION3,REGION4,SEX1,...,POVCAT2,POVCAT3,POVCAT4,POVCAT5,INSCOV1,INSCOV2,INSCOV3,p_0,p_1,gt
0,54.0,0.0,34.7,57.25,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.908582,0.091418,0.0
1,25.0,0.0,50.48,57.54,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.954603,0.045397,1.0
2,37.0,0.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.88655,0.11345,0.0
3,29.0,0.0,-1.0,-1.0,-1.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.922157,0.077843,0.0
4,84.0,1.0,42.1,61.02,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.978833,0.021167,1.0


In [7]:
all_data.columns.tolist()[-3:]

In [8]:
# add the attributes used for training to the model and set them to ModelPipelineInput
# note that attribute names need to contain only letters, numbers, and underscores, and cannot begin with a number
train_x = all_data.drop(columns=['RACE','p_0', 'p_1', 'gt'])
model.from_dataframe(train_x, Stage.ModelPipelineInput)

In [9]:
# gt_<>_utilization will refer to the ground truth values (i.e. true labels), while
# pred_<>_utilization will refer to the predicted outputs of your model. 
pred_to_ground_truth_map = {
    "pred_high_utilization": "gt_high_utilization",
    "pred_low_utilization": "gt_low_utilization"
}

# add the ground truth and predicted attributes to the model
model.add_binary_classifier_output_attributes('pred_high_utilization', pred_to_ground_truth_map)

{'pred_high_utilization': ArthurAttribute(name='pred_high_utilization', value_type='FLOAT', stage='PREDICTED_VALUE', id=None, label=None, position=0, categorical=False, min_range=0, max_range=1, monitor_for_bias=False, categories=None, bins=None, is_unique=False, is_positive_predicted_attribute=True, attribute_link='gt_high_utilization'),
 'gt_high_utilization': ArthurAttribute(name='gt_high_utilization', value_type='INTEGER', stage='GROUND_TRUTH', id=None, label=None, position=0, categorical=True, min_range=None, max_range=None, monitor_for_bias=False, categories=[AttributeCategory(value='0', label=None), AttributeCategory(value='1', label=None)], bins=None, is_unique=False, is_positive_predicted_attribute=False, attribute_link='pred_high_utilization'),
 'pred_low_utilization': ArthurAttribute(name='pred_low_utilization', value_type='FLOAT', stage='PREDICTED_VALUE', id=None, label=None, position=1, categorical=False, min_range=0, max_range=1, monitor_for_bias=False, categories=None, b

In [10]:
# add bias monitoring - this cannot be done after the model is saved
# to monitor for bias, 'RACE' must be an int (or take on discrete string values)

model.add_attribute(name='RACE', 
     stage=Stage.NonInputData,
     value_type=ValueType.Integer, 
     categorical=True, 
     categories=[AttributeCategory(value=1), AttributeCategory(value=0)],
     monitor_for_bias=True)

In [11]:
# check all the attributes loaded to the model. note that even though you passed in your "real" training
# data, currently the model has no reference to the actual datapoints -- just the properties of the 
# attributes. you will need to set reference data (for data drift detection) later
model.review()

Unnamed: 0,name,stage,value_type,categorical,is_unique,categories,bins,range,monitor_for_bias
0,AGE,PIPELINE_INPUT,FLOAT,False,False,[],,"[0.0, 85.0]",False
1,PCS42,PIPELINE_INPUT,FLOAT,False,False,[],,"[-9.0, 62.56]",False
2,MCS42,PIPELINE_INPUT,FLOAT,False,False,[],,"[-9.0, 65.95]",False
3,K6SUM42,PIPELINE_INPUT,FLOAT,False,False,[],,"[-9.0, 15.0]",False
4,REGION1,PIPELINE_INPUT,FLOAT,True,False,"[{value: 0.0}, {value: 1.0}]",,"[None, None]",False
...,...,...,...,...,...,...,...,...,...
137,gt_high_utilization,GROUND_TRUTH,INTEGER,True,False,"[{value: 0}, {value: 1}]",,"[None, None]",False
138,pred_high_utilization,PREDICTED_VALUE,FLOAT,False,False,[],,"[0, 1]",False
139,gt_low_utilization,GROUND_TRUTH,INTEGER,True,False,"[{value: 0}, {value: 1}]",,"[None, None]",False
140,pred_low_utilization,PREDICTED_VALUE,FLOAT,False,False,[],,"[0, 1]",False


In [12]:
# your model is not uploaded to Arthur until you call model.save(). 
model.save()
# model.update() 

'2144927e-3906-4724-9a62-d9062ef06ee8'

## Adding additional functionality

Setting reference data and monitoring for bias adds to the functionality of the `ArthurModel` that you've created. 

### Setting reference data

Now that we've saved the model, we can set reference data.
The reference data df must have a column for each `ModelPipelineInput` and each `NonInput`. Optionally, it 
can contain predicted value and ground truth columns; this will enable the calculation of data drift 
on output attributes. In this model, our ground truth columns are `gt_high_utilization` and `gt_low_utilization`,
and our predicted value columns are `pred_high_utilization` and `pred_low_utilization`. 

We need to rename and update the columns in `all_data` to match these. 

In [13]:
# ['p_0', 'p_1', 'gt']
all_data = all_data.rename(columns={'p_0': 'pred_low_utilization', 'p_1': 'pred_high_utilization'})
gt = pd.get_dummies(all_data['gt'], prefix='gt', dtype='int')
gt = gt.rename(columns={'gt_0.0': 'gt_low_utilization', 'gt_1.0': 'gt_high_utilization'})
all_data = all_data.drop(columns=['gt'])
all_data = all_data.merge(gt, left_index=True, right_index=True)
all_data = all_data.astype({'RACE': 'int64'})

In [14]:
pd.set_option('display.max_rows', 200)
all_data.dtypes

AGE                      float64
RACE                       int64
PCS42                    float64
MCS42                    float64
K6SUM42                  float64
REGION1                  float64
REGION2                  float64
REGION3                  float64
REGION4                  float64
SEX1                     float64
SEX2                     float64
MARRY1                   float64
MARRY2                   float64
MARRY3                   float64
MARRY4                   float64
MARRY5                   float64
MARRY6                   float64
MARRY7                   float64
MARRY8                   float64
MARRY9                   float64
MARRY10                  float64
FTSTU01                  float64
FTSTU1                   float64
FTSTU2                   float64
FTSTU3                   float64
ACTDTY1                  float64
ACTDTY2                  float64
ACTDTY3                  float64
ACTDTY4                  float64
HONRDC1                  float64
HONRDC2   

In [15]:
res = model.set_reference_data(data=all_data) # note: need to explicitly specify the data= argument
print(res)

({'counts': {'success': 138, 'failure': 0, 'total': 138}, 'failures': [[]]}, {'dataset_close_result': 'success'})


### send batches of inferences

We do not need to load/expose the actual predictive model to send inferences to `arthur_model`. `x_shift` is a directory holding batched data. Each of the parquet files is a dataframe with the following columns:

- sensitive attribute, `RACE`
- all attributes used for training
- `p_0`: the predicted probability of this example being in the 0 class - *given by your model*
- `p_1`: the predicted probability of this example being in the 1 class - *given by your model*
- `gt`: the ground truth label. 

In [16]:
%ls data/x_shift 

fulldata_0.parquet  fulldata_2.parquet  fulldata_4.parquet  fulldata_6.parquet
fulldata_1.parquet  fulldata_3.parquet  fulldata_5.parquet  fulldata_7.parquet


##### batch inference format
(df option)
Your batch inferences must have the dummied prediction columns that you created earlier; 
if you don't have ground truth labels at this time, you don't need to send them now.
When sending batch inferences, you must add the following additional columns: 
- `batch_id` - string
- `partner_inference_id`- string
- `inference_timestamp` - datetime.datetime

In [17]:
for i in range(4): # sending inferences without ground truth
    alldata = pd.read_parquet('data/x_shift/fulldata_'+str(i) +'.parquet')
    tosend = alldata.rename(columns={'p_0': 'pred_low_utilization', 'p_1': 'pred_high_utilization'})
    tosend['RACE'] = tosend['RACE'].astype(int)
    
    tosend['partner_inference_id'] = [str(np.random.randint(10000))]*len(tosend)
    tosend['inference_timestamp'] = [(datetime.utcnow())]*len(tosend)
    
    res = model.send_batch_inferences(data=tosend, batch_id=str(i))
    print(res)

({'counts': {'success': 200, 'failure': 0, 'total': 200}, 'failures': [[]]}, {'dataset_close_result': 'success'})
({'counts': {'success': 192, 'failure': 0, 'total': 192}, 'failures': [[]]}, {'dataset_close_result': 'success'})
({'counts': {'success': 187, 'failure': 0, 'total': 187}, 'failures': [[]]}, {'dataset_close_result': 'success'})
({'counts': {'success': 184, 'failure': 0, 'total': 184}, 'failures': [[]]}, {'dataset_close_result': 'success'})


In [18]:
for i in range(4): # adding ground truth later
    alldata = pd.read_parquet('data/x_shift/fulldata_'+str(i) +'.parquet')
    tosend = pd.get_dummies(alldata['gt'], prefix='gt')
    tosend = tosend.rename(columns={'gt_0.0': 'gt_low_utilization', 'gt_1.0': 'gt_high_utilization'})
    tosend = tosend.astype({'gt_low_utilization': 'int64', 'gt_high_utilization': 'int64'})

    tosend['partner_inference_id'] = [str(np.random.randint(10000))]*len(tosend)
    tosend['ground_truth_timestamp'] = [(datetime.utcnow())]*len(tosend)
    
    res = model.send_batch_ground_truths(data=tosend)
    print(res)

{'counts': {'success': 200, 'failure': 0, 'total': 200}, 'failures': [[]]}
{'counts': {'success': 192, 'failure': 0, 'total': 192}, 'failures': [[]]}
{'counts': {'success': 187, 'failure': 0, 'total': 187}, 'failures': [[]]}
{'counts': {'success': 184, 'failure': 0, 'total': 184}, 'failures': [[]]}


In [19]:
for i in range(4): # adding gt and infs at the same time
    alldata = pd.read_parquet('data/x_shift/fulldata_'+str(i+4) +'.parquet')
    tosend = alldata.rename(columns={'p_0': 'pred_low_utilization', 'p_1': 'pred_high_utilization'})
    tosend['RACE'] = tosend['RACE'].astype(int)
    
    gts = pd.get_dummies(alldata['gt'], prefix='gt')
    gts = gts.rename(columns={'gt_0.0': 'gt_low_utilization', 'gt_1.0': 'gt_high_utilization'})
    
    tosend = tosend.merge(gts, left_index=True, right_index=True)

    tosend['partner_inference_id'] = [str(np.random.randint(10000))]*len(tosend)
    tosend['inference_timestamp'] = [(datetime.utcnow())]*len(tosend)
    
    res = model.send_batch_inferences(data=tosend, batch_id=str(i+4))
    print(res)

({'counts': {'success': 195, 'failure': 0, 'total': 195}, 'failures': [[]]}, {'dataset_close_result': 'success'})
({'counts': {'success': 214, 'failure': 0, 'total': 214}, 'failures': [[]]}, {'dataset_close_result': 'success'})
({'counts': {'success': 209, 'failure': 0, 'total': 209}, 'failures': [[]]}, {'dataset_close_result': 'success'})
({'counts': {'success': 219, 'failure': 0, 'total': 219}, 'failures': [[]]}, {'dataset_close_result': 'success'})
