In [1]:
import pandas as pd
import requests
import json
from pandas.io.json import json_normalize
from datetime import datetime, timezone, timedelta

In [2]:
# connectivity values
DR_API_TOKEN = 'FF____INSERT_API_TOKEN_HERE_______TlU9'
DR_MODELING_ENDPOINT = 'https://app.datarobot.com'
DR_MODELING_HEADERS = {'Content-Type': 'application/json', 'Authorization': 'token %s' % DR_API_TOKEN}

In [3]:
# deployment retrieval
DEPLOYMENT_ID = '5c341c008b7d654f'
DRIFT_METRIC = 'psi' # psi - Population Stability Index is the default
PAST_DAYS_TO_RETRIEVE = 120

START_TM = (datetime.now(timezone.utc).replace(microsecond=0, second=0, minute=0) - timedelta(days=PAST_DAYS_TO_RETRIEVE)).isoformat()

In [4]:
# get drift data from the deployment
params = {
    'limit': 100, 
    'metric': DRIFT_METRIC, 
    'start': START_TM
}

response = requests.get(
    url = DR_MODELING_ENDPOINT + '/api/v2/deployments/' + DEPLOYMENT_ID + '/featureDrift/',
    headers=DR_MODELING_HEADERS,
    params=params,
)

if response.status_code != 200:
    print('Request failed; http error {code}: {content}'.format(code=response.status_code, content=response.content))
else:
    df_features = pd.io.json.json_normalize(response.json()['data'])
    sample_size = df_features['sampleSize'].iloc[0]
    df_features = df_features[['name', 'featureImpact', 'driftScore']]

In [5]:
# apply custom alerting logic
MIN_SAMPLE_SIZE = 250
PSI_THRESHOLD = 0.2
EXCESSIVE_PSI_THRESHOLD = 0.8
IGNORE_FEATURES = ['LISTING_TERM']

# get list of features with significant drift
df_psi_drifted_features = df_features[(df_features['driftScore'] >= PSI_THRESHOLD)]

# remove features we do not want to alert on
df_psi_drifted_features = df_psi_drifted_features[~df_psi_drifted_features.name.isin(IGNORE_FEATURES)]

alert = 0
alert_message = DR_MODELING_ENDPOINT + '/deployments/' + DEPLOYMENT_ID + '/data-drift\n'

if sample_size >= MIN_SAMPLE_SIZE:
    if len(df_psi_drifted_features) >= 2:
        alert = 1
        alert_message += '\nAlert: 2 or more features have exceeded a threshold of ' + str(PSI_THRESHOLD)
    if len(df_psi_drifted_features[(df_psi_drifted_features['driftScore'] >= EXCESSIVE_PSI_THRESHOLD)]) > 0:
        alert = 1
        alert_message += '\nAlert: 1 or more features have exceeded an excessive threshold of ' + str(EXCESSIVE_PSI_THRESHOLD)
    
if alert == 1:
    alert_message += '\n\n' + str(df_features.sort_values(by=['driftScore'], ascending=False))
    print(alert_message)
    # take action, eg. send e-mail, kick off new project for training a model replacement, etc.

https://app.datarobot.com/deployments/5c3438b3a5151c008b7d654f/data-drift

Alert: 2 or more features have exceeded a threshold of 0.2
Alert: 1 or more features have exceeded an excessive threshold of 0.8

                                           name  featureImpact  driftScore
9                                    OCCUPATION       0.338381  321.408581
6                             DTI_WPROSPER_LOAN       0.452505    1.939436
1                                  LISTING_TERM       0.845353    0.368711
4                                 CRED_TU_AT34A       0.480062    0.222828
0                       LISTING_MONTHLY_PAYMENT       1.000000    0.190004
7                                 CRED_TU_RE01S       0.398086    0.139175
8                                 CRED_TU_RE29S       0.387202    0.091992
3  RE33S_BALANCE_OWED_ON_ALL_REVOLVING_ACCOUNTS       0.621121    0.080038
5                                 CRED_TU_AT09S       0.477660    0.078290
2                         STATED_MONTHLY_INCO