## Package installation

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from IPython.display import clear_output
!pip install scikit-learn==0.20.2 --user -q
!pip install --upgrade numpy --no-cache | tail -n 1
!pip install --upgrade SciPy --no-cache | tail -n 1
!pip install lime --no-cache | tail -n 1
!pip install ibm_watson_openscale==3.0.3 | tail -n 1
!pip install ibm_wos_utils | tail -n 1
clear_output(wait=False)

In [3]:
!pip install --upgrade pyspark==3.2.0 --no-cache | tail -n 1

#If you are running this notebook in non IBM Watson Studio env then uncomment the below pip statements and run it
#!pip install --upgrade pandas==1.2.3 --no-cache | tail -n 1 
#!pip install --upgrade requests==2.23 --no-cache | tail -n 1
#!pip install numpy==1.20.1 --no-cache | tail -n 1
#!pip install SciPy --no-cache | tail -n 1
#!pip install lime --no-cache | tail -n 1

!pip install --upgrade ibm-watson-machine-learning --user | tail -n 1
!pip install --upgrade ibm-watson-openscale --no-cache | tail -n 1

Successfully installed PyJWT-2.4.0 ibm-cloud-sdk-core-3.10.1 ibm-watson-openscale-3.0.23


### Action: restart the kernel!

## Configure credentials

- WOS_CREDENTIALS (CP4D)
- WML_CREDENTIALS (CP4D)
- DATABASE_CREDENTIALS (DB2 on CP4D or Cloud Object Storage (COS))
- SCHEMA_NAME

In [1]:
# After adding each appropriate value, hit enter to add the next value
from IPython.display import clear_output
import os

WOS_CREDENTIALS = {}
WOS_CREDENTIALS["username"] = input("Username : ")
WOS_CREDENTIALS["password"] = input("Password : ")
WOS_CREDENTIALS["url"] = os.environ['RUNTIME_ENV_APSX_URL']

#Clear output to hide sensitive information from being displayed
clear_output(wait=False)

In [2]:
import json
import requests
import base64
from requests.auth import HTTPBasicAuth
import time
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")

def generate_access_token():
    headers={}
    headers["Accept"] = "application/json"
    auth = HTTPBasicAuth(WOS_CREDENTIALS["username"], WOS_CREDENTIALS["password"])
    
    ICP_TOKEN_URL= WOS_CREDENTIALS["url"] + "/v1/preauth/validateAuth"
    
    response = requests.get(ICP_TOKEN_URL, headers=headers, auth=auth, verify=False)
    json_data = response.json()
    icp_access_token = json_data['accessToken']
    return icp_access_token

token = generate_access_token()

WOS_CREDENTIALS["token"] = token



In [3]:
import ibm_watson_openscale
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator

DATA_MART_ID = '00000000-0000-0000-0000-000000000000'

#authenticate user
authenticator = CloudPakForDataAuthenticator(
        url=WOS_CREDENTIALS['url'],
        username=WOS_CREDENTIALS['username'],
        password=WOS_CREDENTIALS['password'],
        disable_ssl_verification=True
    )

#connect
wos_client = ibm_watson_openscale.APIClient(authenticator=authenticator, service_url=WOS_CREDENTIALS['url'], service_instance_id=DATA_MART_ID)
print(wos_client.version)



3.0.23


In [4]:
from ibm_watson_machine_learning import APIClient
import os

token = generate_access_token()

wml_credentials = {
   "token": token,
   "instance_id" : "openshift",
   "url": os.environ['RUNTIME_ENV_APSX_URL'],
   "version": "4.5"
}

wml_client = APIClient(wml_credentials)



# Training Data

In [5]:
import pandas as pd
train = pd.read_csv('/project_data/data_asset/train_xgb.csv')
train = train.iloc[: , 1:]
train.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,6.7,2.5,5.8,1.8,virginica
1,5.1,3.7,1.5,0.4,setosa
2,5.0,3.2,1.2,0.2,setosa
3,7.2,3.0,5.8,1.6,virginica
4,5.1,3.8,1.5,0.3,setosa


# Configure OpenScale <a name="openscale"></a>

The notebook will now import the necessary libraries and set up a Python OpenScale client.

In [6]:
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import APIClient

from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

In [7]:
authenticator = CloudPakForDataAuthenticator(
        url=WOS_CREDENTIALS['url'],
        username=WOS_CREDENTIALS['username'],
        password=WOS_CREDENTIALS['password'],
        disable_ssl_verification=True
    )
instance_id='00000000-0000-0000-0000-000000000000' #Datamart id

wos_client = APIClient(service_url=WOS_CREDENTIALS['url'],authenticator=authenticator,service_instance_id = instance_id)
wos_client.version

'3.0.23'

## Create datamart

### Set up datamart

Watson OpenScale uses a database to store payload logs and calculated metrics. If database credentials were supplied, the datamart will be created there unless there is an existing datamart and the KEEP_MY_INTERNAL_POSTGRES variable is set to True. If an OpenScale datamart exists in Db2 or PostgreSQL, the existing datamart will be used and no data will be overwritten.

Prior instances of the German Credit model will be removed from OpenScale monitoring.

In [8]:
wos_client.data_marts.show()

0,1,2,3,4,5
AIOSFASTPATHICP-00000000-0000-0000-0000-000000000000,Data Mart created by OpenScale ExpressPath,False,active,2022-08-08 20:50:24.802000+00:00,00000000-0000-0000-0000-000000000000


In [9]:
data_marts = wos_client.data_marts.list().result.data_marts
if len(data_marts) == 0:
    if DB_CREDENTIALS is not None:
        if SCHEMA_NAME is None: 
            print("Please specify the SCHEMA_NAME and rerun the cell")

        print('Setting up external datamart')
        added_data_mart_result = wos_client.data_marts.add(
                background_mode=False,
                name="WOS Data Mart",
                description="Data Mart created by WOS tutorial notebook",
                database_configuration=DatabaseConfigurationRequest(
                  database_type=DatabaseType.DB2,
                    credentials=PrimaryStorageCredentialsLong(
                        hostname=DATABASE_CREDENTIALS['hostname'],
                        username=DATABASE_CREDENTIALS['username'],
                        password=DATABASE_CREDENTIALS['password'],
                        db=DATABASE_CREDENTIALS['database'],
                        port=DATABASE_CREDENTIALS['port'],
                        ssl=DATABASE_CREDENTIALS['ssl'],
                        sslmode=DATABASE_CREDENTIALS['sslmode'],
                        certificate_base64=DATABASE_CREDENTIALS['certificate_base64']
                    ),
                    location=LocationSchemaName(
                        schema_name= SCHEMA_NAME
                    )
                )
             ).result
    else:
        print('Setting up internal datamart')
        added_data_mart_result = wos_client.data_marts.add(
                background_mode=False,
                name="WOS Data Mart",
                description="Data Mart created by WOS tutorial notebook", 
                internal_database = True).result
        
    data_mart_id = added_data_mart_result.metadata.id
    
else:
    data_mart_id=data_marts[0].metadata.id
    print('Using existing datamart {}'.format(data_mart_id))

Using existing datamart 00000000-0000-0000-0000-000000000000


## Remove existing service provider connected with used WML instance.
Multiple service providers for the same engine instance are avaiable in Watson OpenScale. To avoid multiple service providers of used WML instance in the tutorial notebook the following code deletes existing service provder(s) and then adds new one.

In [10]:
SERVICE_PROVIDER_NAME = "WML Function"
SERVICE_PROVIDER_DESCRIPTION = "WML AI function -  WOS notebook."

In [11]:
service_providers = wos_client.service_providers.list().result.service_providers
for service_provider in service_providers:
    service_instance_name = service_provider.entity.name
    if service_instance_name == SERVICE_PROVIDER_NAME:
        service_provider_id = service_provider.metadata.id
        wos_client.service_providers.delete(service_provider_id)
        print("Deleted existing service_provider for WML instance: {}".format(service_provider_id))

## Add service provider
Watson OpenScale needs to be bound to the Watson Machine Learning instance to capture payload data into and out of the model.

**Note:** You can bind more than one engine instance if needed by calling `wos_client.service_providers.add` method. Next, you can refer to particular service provider using `service_provider_id`.

In [12]:
space_id = 'd4c5a38c-44e3-473f-a4ff-c43b3f9cd2d2'
d_id = 'bf6bbfce-250f-4101-be63-63fa469d5131'

In [13]:
added_service_provider_result = wos_client.service_providers.add(
        name=SERVICE_PROVIDER_NAME,
        description=SERVICE_PROVIDER_DESCRIPTION,
        service_type=ServiceTypes.WATSON_MACHINE_LEARNING,
        deployment_space_id = space_id,
        operational_space_id = "production",
        credentials=WMLCredentialsCP4D(
            url=None,
            username=None,
            password=None,
            instance_id=None
        ),
        background_mode=False
    ).result
service_provider_id = added_service_provider_result.metadata.id




 Waiting for end of adding service provider 68c5c98e-3355-4d7b-90eb-f6a84eb6a07a 




active

-----------------------------------------------
 Successfully finished adding service provider 
-----------------------------------------------




In [14]:
wos_client.service_providers.show()

0,1,2,3,4,5
99999999-9999-9999-9999-999999999999,active,WML Function,watson_machine_learning,2022-09-12 19:52:02.738000+00:00,68c5c98e-3355-4d7b-90eb-f6a84eb6a07a
,active,IAE,custom_machine_learning,2022-09-11 07:33:58.158000+00:00,f89f7648-7615-447f-bb75-59e9b3201081
99999999-9999-9999-9999-999999999999,active,Credit-Risk-WML-Production-ML-Engine_TAI_demo,watson_machine_learning,2022-08-25 20:28:18.764000+00:00,ae922c99-dc81-445c-8743-cd004b0cad37
99999999-9999-9999-9999-999999999999,active,Credit-Risk-WML-Pre-Production-ML-Engine_TAI_demo,watson_machine_learning,2022-08-25 20:15:10.335000+00:00,b5253aab-f036-4bab-b3ef-becaded6eb38
,active,Walmart Notebook Batch,custom_machine_learning,2022-08-19 06:21:52.192000+00:00,87624e51-9c77-4b4a-8c3a-931dc7221e76
,active,Walmart Batch,custom_machine_learning,2022-08-10 03:40:22.790000+00:00,8113053b-6751-4b55-9aca-937cb20a1a31
99999999-9999-9999-9999-999999999999,active,WOS ExpressPath WML pre_production binding,watson_machine_learning,2022-08-08 20:50:47.196000+00:00,bf0e39f8-d5fc-4728-ad85-7cf1b6dc19b7
99999999-9999-9999-9999-999999999999,active,WOS ExpressPath WML production binding,watson_machine_learning,2022-08-08 20:50:41.616000+00:00,bb9da7e5-88e6-405e-800b-bead653da540


In [15]:
asset_deployment_details = wos_client.service_providers.list_assets(data_mart_id=data_mart_id, service_provider_id=service_provider_id,deployment_id=d_id, deployment_space_id = space_id).result['resources'][0]
asset_deployment_details


{'metadata': {'guid': 'bf6bbfce-250f-4101-be63-63fa469d5131',
  'created_at': '2022-09-12T19:29:41.769Z',
  'modified_at': '2022-09-12T19:29:41.769Z'},
 'entity': {'name': 'iris class 0912',
  'type': 'online',
  'scoring_endpoint': {'url': 'https://ibm-nginx-svc.zen.svc.cluster.local/ml/v4/deployments/bf6bbfce-250f-4101-be63-63fa469d5131/predictions'},
  'asset': {},
  'asset_properties': {}}}

In [16]:
model_asset_details_from_deployment=wos_client.service_providers.get_deployment_asset(data_mart_id=data_mart_id,service_provider_id=service_provider_id,deployment_id=d_id,deployment_space_id=space_id)
model_asset_details_from_deployment


{'metadata': {'guid': 'bf6bbfce-250f-4101-be63-63fa469d5131',
  'created_at': '2022-09-12T19:29:41.769Z',
  'modified_at': '2022-09-12T19:29:41.769Z'},
 'entity': {'name': 'iris class 0912',
  'type': 'online',
  'scoring_endpoint': {'url': 'https://ibm-nginx-svc.zen.svc.cluster.local/ml/v4/deployments/bf6bbfce-250f-4101-be63-63fa469d5131/predictions'},
  'asset': {'asset_id': '851cdca3-d474-4251-b25c-866483e7689e',
   'url': 'https://ibm-nginx-svc.zen.svc.cluster.local/ml/v4/functions/851cdca3-d474-4251-b25c-866483e7689e?space_id=d4c5a38c-44e3-473f-a4ff-c43b3f9cd2d2&version=2020-06-12',
   'name': 'iris class 0912',
   'asset_type': 'function',
   'created_at': '2022-09-12T19:29:39.768Z',
   'modified_at': '2022-09-12T19:29:40.554Z'},
  'asset_properties': {'model_type': 'python',
   'runtime_environment': 'python-3.9'}}}

## Subscriptions

In [17]:
wos_client.subscriptions.show()

0,1,2,3,4,5,6,7,8
fe535d57-32dd-4b14-8958-a5977e7e9860,walmartAutomation2,00000000-0000-0000-0000-000000000000,831cae89-fc1b-4d70-b9b7-e067b690ca94,walmartAutomation2,f89f7648-7615-447f-bb75-59e9b3201081,active,2022-09-12 05:07:29.502000+00:00,6da441c0-164a-4d33-a06d-c8ed0a52b453
ad60e39b-9c69-41fe-b092-d5033a0923f3,walmartAutomation,00000000-0000-0000-0000-000000000000,21718f09-1c8f-40ed-b4d9-a678c7423dce,walmartAutomation,f89f7648-7615-447f-bb75-59e9b3201081,active,2022-09-11 21:26:31.034000+00:00,57edd725-bfc3-4f9f-9e5e-b3ba0214838b
e78ef894-2cf1-448b-a53d-a7423eb69693,walmartAutomation_as_debug,00000000-0000-0000-0000-000000000000,d4f4f672-d5d7-49c0-8409-dc7186732096,walmartAutomation_as_debug,87624e51-9c77-4b4a-8c3a-931dc7221e76,active,2022-09-09 23:39:04.693000+00:00,5e21f09e-2c7e-4070-a481-fff5cc51f291
5a751461-d82d-4ab5-9929-e13c1d5a80a6,walmartAutomation_parquet,00000000-0000-0000-0000-000000000000,45d2f7aa-9cd2-4d5f-a54f-dbd31bbc9683,walmartAutomation_parquet,87624e51-9c77-4b4a-8c3a-931dc7221e76,active,2022-09-09 01:07:59.380000+00:00,170f9773-e6e0-4891-9466-6f7f0980e94f
a7b8b14d-1216-442f-8bb7-1e38e715d983,walmartAutomation_demo_as,00000000-0000-0000-0000-000000000000,0f7a9da4-2744-4528-9a0c-471abc3ba1b2,walmartAutomation_demo_as,87624e51-9c77-4b4a-8c3a-931dc7221e76,active,2022-09-05 17:46:39.116000+00:00,3201b76e-bf73-4153-9d32-2dfb056dea7f
b376cd17-407f-477f-a649-ecc9003025a6,XGB - Credit Risk Pipeline_TAI_demo,00000000-0000-0000-0000-000000000000,9ffcb318-420a-4fb4-9ab8-a84be5f8ff41,prod_xgb_credit_risk_pipeline_deployment_TAI_demo,ae922c99-dc81-445c-8743-cd004b0cad37,active,2022-08-25 20:33:08.565000+00:00,3ae086ec-53f2-413b-90d9-b86577370aa9
f29afad1-3424-4086-b383-9bea3bb90b83,[asset] GCR Batch Test,00000000-0000-0000-0000-000000000000,3d628944-ebb1-47ff-a5de-c107a6f0e206,GCR Batch Test,8113053b-6751-4b55-9aca-937cb20a1a31,active,2022-08-10 11:16:58.746000+00:00,86522670-6250-4e8d-a075-c63fe923a728
9780cc4e-78ad-4334-bbdd-138f8ffcdb21,[asset] GCR Batch Test 1,00000000-0000-0000-0000-000000000000,be036929-9a83-4f84-9598-2a88b020d686,GCR Batch Test 1,8113053b-6751-4b55-9aca-937cb20a1a31,active,2022-08-10 10:33:42.865000+00:00,6aff3f69-7e7b-4d9d-8172-1cc327784f66
a62595e4-1bf2-4bbe-ab83-15688bb340f7,RF - Credit Risk Pipeline_TAI_demo,00000000-0000-0000-0000-000000000000,108e6a7b-4ca6-4eed-93e3-9e789b9192dc,pre_prod_rf_credit_risk_pipeline_deployment_TAI_demo,b5253aab-f036-4bab-b3ef-becaded6eb38,active,2022-08-25 20:15:32.581000+00:00,a862a3b7-c74a-4f0d-8f36-412b6e4f7535
e28c4da1-d44e-4651-a4fc-c5b9311717b2,[asset] gcr1,00000000-0000-0000-0000-000000000000,670eccba-5d2f-4603-9cfa-b81d1198ef13,gcr1,8113053b-6751-4b55-9aca-937cb20a1a31,active,2022-08-22 17:42:44.331000+00:00,8db0c118-2446-4ab5-a503-3dd4d45eca0d


Note: First 10 records were displayed.


In [18]:
#subscriptions = wos_client.subscriptions.list().result.subscriptions
#for subscription in subscriptions:
#    sub_model_id = subscription.entity.asset.asset_id
#    if sub_model_id == d_id:
#        wos_client.subscriptions.delete(subscription.metadata.id)
#        print('Deleted existing subscription for model', sub_model_id)

This code creates the model subscription in OpenScale using the Python client API. Note that we need to provide the model unique identifier, and some information about the model itself.

In [19]:
from ibm_watson_openscale.base_classes.watson_open_scale_v2 import Asset, AssetDeploymentRequest, AssetPropertiesRequest, Target, SparkStruct

training = {'id': 'training schema', 'type': 'struct', 'fields': [
    {'name': 'Sepal.Length', 'type': 'double', 'nullable': False, 'metadata': {}},
    {'name': 'Sepal.Width', 'type': 'double', 'nullable': False, 'metadata': {}},
    {'name': 'Petal.Length', 'type': 'double', 'nullable': False, 'metadata': {}},
    {'name': 'Petal.Width', 'type': 'double', 'nullable': False, 'metadata': {}},
    {'name': 'Species', 'type': 'string', 'nullable': False, 'metadata': {}}]}

subscription_details = wos_client.subscriptions.add(
        data_mart_id=data_mart_id,
        service_provider_id=service_provider_id,
        asset=Asset(
            asset_id=model_asset_details_from_deployment["entity"]["asset"]["asset_id"],
            name=model_asset_details_from_deployment["entity"]["asset"]["name"],
            url=model_asset_details_from_deployment["entity"]["asset"]["url"],
            asset_type=AssetTypes.MODEL,
            input_data_type=InputDataType.STRUCTURED,
            problem_type=ProblemType.MULTICLASS_CLASSIFICATION
        ),
        deployment=AssetDeploymentRequest(
            deployment_id=asset_deployment_details['metadata']['guid'],
            name=asset_deployment_details['entity']['name'],
            deployment_type= DeploymentTypes.ONLINE,
            url=asset_deployment_details['entity']['scoring_endpoint']['url']
        ),
        asset_properties=AssetPropertiesRequest(
            label_column='Species',
            probability_fields=['probability'],
            prediction_field='prediction',
            feature_fields = ["Sepal.Length","Sepal.Width","Petal.Length","Petal.Width"],
            training_data_reference=TrainingDataReference(type='cos',
                                                          location=COSTrainingDataReferenceLocation(bucket = 'cs-xgb-bucket',
                                                                                                    file_name = 'train_xgb.csv'),
                                                          connection=COSTrainingDataReferenceConnection.from_dict({
                                                                        "resource_instance_id": 'crn:v1:bluemix:public:cloud-object-storage:global:a/677fc976cf4b442c912b1e7bfecf48db:ac0f6349-84a7-4d6f-8b1b-41c0ddb480f4::',
                                                                        "url": "https://control.cloud-object-storage.cloud.ibm.com/v2/endpoints",
                                                                        "api_key": 'YP0KB91UMMIWzKhER4ZAt0eB_6dnD6XBngJ4Sw2SjL9v',
                                                                        "iam_url": "https://iam.ng.bluemix.net/oidc/token"})),
            training_data_schema=SparkStruct.from_dict(training)
        ),
        background_mode=False
    ).result
subscription_id = subscription_details.metadata.id
subscription_id




 Waiting for end of adding subscription 2dd86d22-d9fe-47e7-9de1-442b640ac58d 




preparing
active

-------------------------------------------
 Successfully finished adding subscription 
-------------------------------------------




'2dd86d22-d9fe-47e7-9de1-442b640ac58d'

# Drift

## Run Drift Trainer

In order to monitor for Drift, OpenScale needs to train a Drift Model that learns the data used for training along with the predictions. It's important to perform this step before setting up payload logging: when we train the Drift model, we will be making some scoring requests to the deployment and we want to avoid logging those, as they are not "real" scoring requests coming from end users. Therefore we make sure to perform this step before the next section (payload logging setup).

In [20]:
from ibm_wos_utils.drift.drift_trainer import DriftTrainer

In [26]:
def score(data, d_id=d_id):
    import numpy as np
    """
    From the generate_drift_detection_model docs:
    A function that accepts a dataframe with features as columns and returns a tuple of numpy array
    of probabilities array of shape `(n_samples,n_classes)` and numpy array of prediction vector of shape `(n_samples,)`
    """
    feature_columns = ["Sepal.Length","Sepal.Width","Petal.Length","Petal.Width"]
    scoring_payload = {'input_data': [{'features': feature_columns, 'values': data[feature_columns].values.tolist()}]}
    scoring_result = wml_client.deployments.score(d_id, scoring_payload)
    
    probas = np.array([x[1] for x in scoring_result['predictions'][0]['values']])
    preds = np.array([x[0] for x in scoring_result['predictions'][0]['values']])
    return probas, preds

In [27]:
wml_client.set.default_space(space_id)

'SUCCESS'

In [50]:
drift_inputs = {
    'feature_columns': ["Sepal.Length","Sepal.Width","Petal.Length","Petal.Width"],
    'categorical_columns': [],
    'label_column': 'Species',
    'problem_type': 'multiclass'
    
}

drift_trainer = DriftTrainer(train, drift_inputs)
drift_trainer.generate_drift_detection_model(score, batch_size=train.shape[0])
drift_trainer.learn_constraints()
drift_trainer.create_archive()

Scoring training dataframe...: 100%|██████████| 89/89 [00:02<00:00, 31.79rows/s]
Optimising Drift Detection Model...: 100%|██████████| 40/40 [00:24<00:00,  1.63models/s]
Scoring training dataframe...: 100%|██████████| 23/23 [00:02<00:00, 11.05rows/s]
Computing feature stats...: 100%|██████████| 4/4 [00:00<00:00, 134.01features/s]
Learning single feature constraints...: 100%|██████████| 5/5 [00:00<00:00, 401.41constraints/s]
Learning two feature constraints...: 100%|██████████| 9/9 [00:00<00:00, 17.87constraints/s]


In [51]:
#check if drift model has been saved
!ls 

config.yaml  drift_detection_model.tar.gz


In [52]:
upload_response = wos_client.monitor_instances.upload_drift_model(
    model_path="./drift_detection_model.tar.gz",
    data_mart_id=DATA_MART_ID,
    subscription_id=subscription_id
)

if upload_response.get_status_code() == 200:
    print('Successfully uploaded the drift model.')
else:
    print('Something went wrong. Look into this error before running the next section.')
    print('Hint: check that sklearn is in v. 0.20.2 (restart your kernel, make sure to run section 0.2).')

Successfully uploaded the drift model.


In [53]:
#remove the locally saved drift model
!mv drift_detection_model.tar.gz "/project_data/data_asset"

In [54]:
# check if the locally saved drift model is deleted
!ls

config.yaml


# Payload Logging

In [33]:
import time

time.sleep(5)
payload_data_set_id = None
payload_data_set_id = wos_client.data_sets.list(type=DataSetTypes.PAYLOAD_LOGGING, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result.data_sets[0].metadata.id
if payload_data_set_id is None:
    print("Payload data set not found. Please check subscription status.")
else:
    print("Payload data set id: ", payload_data_set_id)

Payload data set id:  434bf30b-8bd1-42bf-96fd-35313a6db94d


In [34]:
wos_client.data_sets.show()

0,1,2,3,4,5,6
00000000-0000-0000-0000-000000000000,active,2dd86d22-d9fe-47e7-9de1-442b640ac58d,subscription,payload_logging,2022-09-12 19:55:49.782000+00:00,434bf30b-8bd1-42bf-96fd-35313a6db94d
00000000-0000-0000-0000-000000000000,active,2dd86d22-d9fe-47e7-9de1-442b640ac58d,subscription,payload_logging_error,2022-09-12 19:55:50.009000+00:00,839e77a7-19d2-40ca-81b6-5e0671a37d3a
00000000-0000-0000-0000-000000000000,active,2dd86d22-d9fe-47e7-9de1-442b640ac58d,subscription,manual_labeling,2022-09-12 19:55:49.896000+00:00,3d466f68-6ae6-4e57-9f90-c4ab18dc7341
00000000-0000-0000-0000-000000000000,active,3ae086ec-53f2-413b-90d9-b86577370aa9,subscription,manual_labeling,2022-08-25 20:33:16.886000+00:00,18293fc2-fa41-4ddf-81e3-88d6a0ee9c61
00000000-0000-0000-0000-000000000000,active,3ae086ec-53f2-413b-90d9-b86577370aa9,subscription,feedback,2022-08-25 20:37:42.851000+00:00,6e0bbbd4-16d7-459a-9d99-73b9ee78e777
00000000-0000-0000-0000-000000000000,active,6da441c0-164a-4d33-a06d-c8ed0a52b453,subscription,payload_logging_error,2022-09-12 05:07:37.178000+00:00,098b2997-c283-4129-a654-f96da4da0b29
00000000-0000-0000-0000-000000000000,active,6da441c0-164a-4d33-a06d-c8ed0a52b453,subscription,training,2022-09-12 05:07:36.912000+00:00,032e4fa0-2186-46a9-bc5b-0f8450c01237
00000000-0000-0000-0000-000000000000,active,6da441c0-164a-4d33-a06d-c8ed0a52b453,subscription,manual_labeling,2022-09-12 05:07:36.728000+00:00,36920a51-5267-46d3-8a0c-78672914b300
00000000-0000-0000-0000-000000000000,active,6da441c0-164a-4d33-a06d-c8ed0a52b453,subscription,payload_logging,2022-09-12 05:07:36.511000+00:00,91fccb01-a5fc-460f-8c21-2e9129ffe347
00000000-0000-0000-0000-000000000000,active,57edd725-bfc3-4f9f-9e5e-b3ba0214838b,subscription,payload_logging_error,2022-09-11 21:26:37.936000+00:00,444a000d-ab0f-4ce5-97c4-57b398588274


Note: First 10 records were displayed.


### Score the model so we can configure monitors

Now that the WML service has been bound and the subscription has been created, we need to send a request to the model before we configure OpenScale. This allows OpenScale to create a payload log in the datamart with the correct schema, so it can capture data coming into and out of the model.

In [36]:
json.loads(train.iloc[: , :-1].to_json(orient="values"))

[[6.7, 2.5, 5.8, 1.8],
 [5.1, 3.7, 1.5, 0.4],
 [5.0, 3.2, 1.2, 0.2],
 [7.2, 3.0, 5.8, 1.6],
 [5.1, 3.8, 1.5, 0.3],
 [6.2, 2.2, 4.5, 1.5],
 [5.1, 3.8, 1.9, 0.4],
 [6.2, 3.4, 5.4, 2.3],
 [6.5, 3.0, 5.2, 2.0],
 [6.4, 2.9, 4.3, 1.3],
 [5.8, 2.7, 3.9, 1.2],
 [6.4, 2.8, 5.6, 2.2],
 [4.7, 3.2, 1.3, 0.2],
 [6.6, 3.0, 4.4, 1.4],
 [4.6, 3.4, 1.4, 0.3],
 [6.7, 3.3, 5.7, 2.1],
 [6.0, 2.2, 4.0, 1.0],
 [4.8, 3.4, 1.6, 0.2],
 [5.0, 3.6, 1.4, 0.2],
 [5.6, 3.0, 4.5, 1.5],
 [6.4, 2.8, 5.6, 2.1],
 [5.4, 3.4, 1.7, 0.2],
 [4.6, 3.2, 1.4, 0.2],
 [5.7, 2.8, 4.1, 1.3],
 [4.7, 3.2, 1.6, 0.2],
 [6.7, 3.1, 5.6, 2.4],
 [6.9, 3.1, 5.1, 2.3],
 [6.5, 3.0, 5.5, 1.8],
 [6.3, 2.7, 4.9, 1.8],
 [6.3, 3.3, 4.7, 1.6],
 [6.9, 3.1, 4.9, 1.5],
 [6.5, 2.8, 4.6, 1.5],
 [6.7, 3.1, 4.4, 1.4],
 [5.3, 3.7, 1.5, 0.2],
 [5.2, 3.5, 1.5, 0.2],
 [5.7, 2.8, 4.5, 1.3],
 [6.1, 2.6, 5.6, 1.4],
 [4.8, 3.0, 1.4, 0.3],
 [4.9, 3.1, 1.5, 0.2],
 [4.9, 3.6, 1.4, 0.1],
 [6.1, 3.0, 4.6, 1.4],
 [5.8, 2.7, 5.1, 1.9],
 [6.9, 3.2, 5.7, 2.3],
 [5.7, 2.9,

In [38]:
fields = ["Sepal.Length","Sepal.Width","Petal.Length","Petal.Width"]

payload_scoring = { "input_data": [ {"fields": fields,"values": json.loads(train.iloc[: , :-1].to_json(orient="values"))}]}
scoring_response = wml_client.deployments.score(d_id, payload_scoring)
scoring_response

{'predictions': [{'fields': ['prediction', 'probability'],
   'values': [['virginica', [0.3218, 0.3239, 0.3543]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['virginica', [0.3196, 0.3329, 0.3475]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['versicolor', [0.3222, 0.3553, 0.3225]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['virginica', [0.3218, 0.3239, 0.3543]],
    ['virginica', [0.3218, 0.3238, 0.3543]],
    ['versicolor', [0.3222, 0.3553, 0.3225]],
    ['versicolor', [0.3222, 0.3553, 0.3225]],
    ['virginica', [0.3218, 0.3239, 0.3543]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['versicolor', [0.3222, 0.3553, 0.3225]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['virginica', [0.3218, 0.3239, 0.3543]],
    ['versicolor', [0.3222, 0.3553, 0.3225]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['setosa', [0.3563, 0.3219, 0.3218]],
    ['versicolor', [0.3222, 0.3553, 0.3225]],
    ['virginica', [0.3218, 0.3239, 0.3543]],
    

## Check if WML payload logging worked else manually store payload records

In [39]:
import uuid
from ibm_watson_openscale.supporting_classes.payload_record import PayloadRecord
time.sleep(5)
pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
print("Number of records in the payload logging table: {}".format(pl_records_count))
if pl_records_count == 0:
    print("Payload logging did not happen, performing explicit payload logging.")
    wos_client.data_sets.store_records(data_set_id=payload_data_set_id, request_body=[PayloadRecord(
                   scoring_id=str(uuid.uuid4()),
                   request=payload_scoring["input_data"][0],
                   response={"fields": scoring_response['predictions'][0]['fields'], "values":scoring_response['predictions'][0]['values']},
                   response_time=460
               )])
    time.sleep(20)
    pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
    print("Number of records in the payload logging table: {}".format(pl_records_count))

Number of records in the payload logging table: 112



# Quality monitoring and feedback logging <a name="quality"></a>

## Enable quality monitoring

The code below waits ten seconds to allow the payload logging table to be set up before it begins enabling monitors. First, it turns on the quality (accuracy) monitor and sets an alert threshold of 70%. OpenScale will show an alert on the dashboard if the model accuracy measurement (area under the curve, in the case of a binary classifier) falls below this threshold.

The second paramater supplied, min_records, specifies the minimum number of feedback records OpenScale needs before it calculates a new measurement. The quality monitor runs hourly, but the accuracy reading in the dashboard will not change until an additional 50 feedback records have been added, via the user interface, the Python client, or the supplied feedback endpoint.

In [55]:
import time

time.sleep(10)
target = Target(
        target_type=TargetTypes.SUBSCRIPTION,
        target_id=subscription_id
)
parameters = {
    "min_feedback_data_size": 10
}
thresholds = [
                {
                    "metric_id": "area_under_roc",
                    "type": "lower_limit",
                    "value": .80
                }
            ]
quality_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.QUALITY.ID,
    target=target,
    parameters=parameters,
    thresholds=thresholds
).result

ApiException: Error: Resource could not be created because of conflict with existing resource `8ba5691e-5e91-4c9f-8494-baca802f2190` of type `monitor_instance`, Code: 409 , X-global-transaction-id: OTVhMTllYjYtYmVhOS00NzIzLWFmNjAtNDM0YmQwNmEyMzFk

In [41]:
quality_monitor_instance_id = quality_monitor_details.metadata.id
quality_monitor_instance_id

'8ba5691e-5e91-4c9f-8494-baca802f2190'

## Feedback logging

The code below downloads and stores enough feedback data to meet the minimum threshold so that OpenScale can calculate a new accuracy measurement. It then kicks off the accuracy monitor. The monitors run hourly, or can be initiated via the Python API, the REST API, or the graphical user interface.

In [None]:
!rm additional_feedback_data_v2.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/Cloud%20Pak%20for%20Data/WML/assets/data/credit_risk/additional_feedback_data_v2.json

## Get feedback logging dataset ID

In [None]:
feedback_dataset_id = None
feedback_dataset = wos_client.data_sets.list(type=DataSetTypes.FEEDBACK, 
                                                target_target_id=subscription_id, 
                                                target_target_type=TargetTypes.SUBSCRIPTION).result
print(feedback_dataset)
feedback_dataset_id = feedback_dataset.data_sets[0].metadata.id
if feedback_dataset_id is None:
    print("Feedback data set not found. Please check quality monitor status.")

In [None]:
with open('additional_feedback_data_v2.json') as feedback_file:
    additional_feedback_data = json.load(feedback_file)

In [None]:
wos_client.data_sets.store_records(feedback_dataset_id, request_body=additional_feedback_data, background_mode=False)

In [None]:
wos_client.data_sets.get_records_count(data_set_id=feedback_dataset_id)

In [None]:
run_details = wos_client.monitor_instances.run(monitor_instance_id=quality_monitor_instance_id, background_mode=False).result

In [None]:
wos_client.monitor_instances.show_metrics(monitor_instance_id=quality_monitor_instance_id)

# Fairness, drift monitoring and explanations 
 <a name="fairness"></a>

The code below configures fairness monitoring for our model. It turns on monitoring for two features, Sex and Age. In each case, we must specify:
  * Which model feature to monitor
  * One or more **majority** groups, which are values of that feature that we expect to receive a higher percentage of favorable outcomes
  * One or more **minority** groups, which are values of that feature that we expect to receive a higher percentage of unfavorable outcomes
  * The threshold at which we would like OpenScale to display an alert if the fairness measurement falls below (in this case, 95%)

Additionally, we must specify which outcomes from the model are favourable outcomes, and which are unfavourable. We must also provide the number of records OpenScale will use to calculate the fairness score. In this case, OpenScale's fairness monitor will run hourly, but will not calculate a new fairness rating until at least 200 records have been added. Finally, to calculate fairness, OpenScale must perform some calculations on the training data, so we provide the dataframe containing the data.

In [42]:
wos_client.monitor_instances.show()

0,1,2,3,4,5,6
00000000-0000-0000-0000-000000000000,active,2dd86d22-d9fe-47e7-9de1-442b640ac58d,subscription,quality,2022-09-12 20:02:56.758000+00:00,8ba5691e-5e91-4c9f-8494-baca802f2190
00000000-0000-0000-0000-000000000000,active,b2ef444f-1bad-4464-926f-ab9a1a25992b,subscription,fairness,2022-08-08 20:59:26.313000+00:00,5c669fbb-c4b2-4fc7-b155-feb0071ce8d8
00000000-0000-0000-0000-000000000000,active,3ae086ec-53f2-413b-90d9-b86577370aa9,subscription,statistical_parity_differences,2022-08-30 20:54:32.848000+00:00,1e752fbc-5140-4ddd-84ef-aa98958ce430
00000000-0000-0000-0000-000000000000,active,3ae086ec-53f2-413b-90d9-b86577370aa9,subscription,fairness,2022-08-25 20:39:33.117000+00:00,07306eb9-dede-4f42-a705-318f90182320
00000000-0000-0000-0000-000000000000,active,b2ef444f-1bad-4464-926f-ab9a1a25992b,subscription,drift,2022-08-08 20:59:39.881000+00:00,e0181aa1-d394-463b-b502-60f546b3b57f
00000000-0000-0000-0000-000000000000,active,3201b76e-bf73-4153-9d32-2dfb056dea7f,subscription,drift,2022-09-05 17:46:52.050000+00:00,b5020262-e23e-4453-9f84-d572d8ad2061
00000000-0000-0000-0000-000000000000,active,d1a4f312-f47e-4034-89fb-09669c874be8,subscription,fairness,2022-08-22 17:41:54.601000+00:00,8c4e9f8d-1996-402f-96f0-c83a2370c42a
00000000-0000-0000-0000-000000000000,active,3ae086ec-53f2-413b-90d9-b86577370aa9,subscription,drift,2022-08-25 20:43:14.566000+00:00,f70a59c1-707e-4ece-bcee-06566c6389fa
00000000-0000-0000-0000-000000000000,active,6da441c0-164a-4d33-a06d-c8ed0a52b453,subscription,explainability,2022-09-12 05:07:46.284000+00:00,310425e5-61d1-4738-b851-ce7f287728b5
00000000-0000-0000-0000-000000000000,active,6da441c0-164a-4d33-a06d-c8ed0a52b453,subscription,drift,2022-09-12 05:07:59.291000+00:00,d5500632-e378-4a6f-800b-b692db3b3660


Note: First 10 records were displayed.


In [47]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id

)
parameters = {
    "features": [
        {"feature": "Sepal.Length",
         "majority": [[1, 2]],
         "minority": [[3, 4], [5,6]],
         "threshold": 0.95
         }
    ],
    "favourable_class": ["setosa"],
    "unfavourable_class": ["versicolor", "virginica"],
    "min_records": 100
}

fairness_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.FAIRNESS.ID,
    target=target,
    parameters=parameters).result
fairness_monitor_instance_id =fairness_monitor_details.metadata.id
fairness_monitor_instance_id

ApiException: Error: Resource could not be created because of conflict with existing resource `7003b7c7-7152-4ad0-bdce-4ea4081ff9eb` of type `monitor_instance`, Code: 409 , X-global-transaction-id: NmViZjViNzktNGQ0MS00MzBiLWI3NTgtMGFmNDA3ZWJlZmYx

## Drift configuration

In [43]:
monitor_instances = wos_client.monitor_instances.list().result.monitor_instances
for monitor_instance in monitor_instances:
    monitor_def_id=monitor_instance.entity.monitor_definition_id
    if monitor_def_id == "drift" and monitor_instance.entity.target.target_id == subscription_id:
        wos_client.monitor_instances.delete(monitor_instance.metadata.id)
        print('Deleted existing drift monitor instance with id: ', monitor_instance.metadata.id)

In [44]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id

)
parameters = {
    "min_samples": 100,
    "drift_threshold": 0.1,
    "train_drift_model": True,
    "enable_model_drift": False,
    "enable_data_drift": True
}

drift_monitor_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.DRIFT.ID,
    target=target,
    parameters=parameters
).result

drift_monitor_instance_id = drift_monitor_details.metadata.id
drift_monitor_instance_id




 Waiting for end of monitor instance creation b7526797-b323-46e9-b494-16561a157a61 




error

-----------------------------------------------------
 Monitor instance creation failed with status: error 
-----------------------------------------------------


Reason: ['code: AIQDD6004E, message: Unable to read the train_xgb.csv training data file in the cs-xgb-bucket COS bucket.']


'b7526797-b323-46e9-b494-16561a157a61'

## Score the model again now that monitoring is configured

This next section randomly selects 200 records from the data feed and sends those records to the model for predictions. This is enough to exceed the minimum threshold for records set in the previous section, which allows OpenScale to begin calculating fairness.

In [None]:
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/Cloud%20Pak%20for%20Data/WML/assets/data/credit_risk/german_credit_feed.json
!ls -lh german_credit_feed.json

Score 200 randomly chosen records

In [None]:
import random

with open('german_credit_feed.json', 'r') as scoring_file:
    scoring_data = json.load(scoring_file)

fields = scoring_data['fields']
values = []
for _ in range(200):
    values.append(random.choice(scoring_data['values']))
payload_scoring = {"input_data": [{"fields": fields, "values": values}]}

scoring_response = wml_client.deployments.score(ai_func_deployment_uid, payload_scoring)
time.sleep(5)

if pl_records_count == 8:
    print("Payload logging did not happen, performing explicit payload logging.")
    wos_client.data_sets.store_records(data_set_id=payload_data_set_id, request_body=[PayloadRecord(
                   scoring_id=str(uuid.uuid4()),
                   request=payload_scoring,
                   response=scoring_response,
                   response_time=460
               )])
    time.sleep(5)
    pl_records_count = wos_client.data_sets.get_records_count(payload_data_set_id)
    print("Number of records in the payload logging table: {}".format(pl_records_count))

In [None]:
print('Number of records in payload table: ', wos_client.data_sets.get_records_count(data_set_id=payload_data_set_id))

## Run fairness monitor

Kick off a fairness monitor run on current data. The monitor runs hourly, but can be manually initiated using the Python client, the REST API, or the graphical user interface.

In [None]:
time.sleep(5)
run_details = wos_client.monitor_instances.run(monitor_instance_id=fairness_monitor_instance_id, background_mode=False)

In [None]:
time.sleep(10)
wos_client.monitor_instances.show_metrics(monitor_instance_id=fairness_monitor_instance_id)

## Run drift monitor

Kick off a drift monitor run on current data. The monitor runs every hour, but can be manually initiated using the Python client, the REST API.

In [None]:
drift_run_details = wos_client.monitor_instances.run(monitor_instance_id=drift_monitor_instance_id, background_mode=False)

In [None]:
time.sleep(5)
wos_client.monitor_instances.show_metrics(monitor_instance_id=drift_monitor_instance_id)

## Configure Explainability

Finally, we provide OpenScale with the training data to enable and configure the explainability features.

In [48]:
target = Target(
    target_type=TargetTypes.SUBSCRIPTION,
    target_id=subscription_id
)
parameters = {
    "enabled": True
}
explainability_details = wos_client.monitor_instances.create(
    data_mart_id=data_mart_id,
    background_mode=False,
    monitor_definition_id=wos_client.monitor_definitions.MONITORS.EXPLAINABILITY.ID,
    target=target,
    parameters=parameters
).result

explainability_monitor_id = explainability_details.metadata.id




 Waiting for end of monitor instance creation 9fee57b4-d3b0-47ef-80ee-bbb9b5879ed6 




active

---------------------------------------
 Monitor instance successfully created 
---------------------------------------




## Run explanation for sample record

In [49]:
pl_records_resp = wos_client.data_sets.get_list_of_records(data_set_id=payload_data_set_id, limit=1, offset=0).result
scoring_ids = [pl_records_resp["records"][0]["entity"]["values"]["scoring_id"]]
print("Running explanations on scoring IDs: {}".format(scoring_ids))
explanation_types = ["lime", "contrastive"]
result = wos_client.monitor_instances.explanation_tasks(scoring_ids=scoring_ids, explanation_types=explanation_types).result
print(result)

Running explanations on scoring IDs: ['291863422122685c0cc8cf0b5b32af61-1']
{
  "metadata": {
    "explanation_task_ids": [
      "9ae84790-8fc7-49a9-88b5-f47acd11c0a8"
    ],
    "created_by": "1000331030",
    "created_at": "2022-09-12T20:07:01.436229Z"
  }
}


# Custom monitors and metrics <a name="custom"></a>

## Register custom monitor

In [None]:
def get_definition(monitor_name):
    monitor_definitions = wos_client.monitor_definitions.list().result.monitor_definitions
    
    for definition in monitor_definitions:
        if monitor_name == definition.entity.name:
            return definition
    
    return None

In [None]:
monitor_name = 'my model performance'
metrics = [MonitorMetricRequest(name='sensitivity',
                                thresholds=[MetricThreshold(type=MetricThresholdTypes.LOWER_LIMIT, default=0.8)]),
          MonitorMetricRequest(name='specificity',
                                thresholds=[MetricThreshold(type=MetricThresholdTypes.LOWER_LIMIT, default=0.75)])]
tags = [MonitorTagRequest(name='region', description='customer geographical region')]

existing_definition = get_definition(monitor_name)

if existing_definition is None:
    custom_monitor_details = wos_client.monitor_definitions.add(name=monitor_name, metrics=metrics, tags=tags, background_mode=False).result
else:
    custom_monitor_details = existing_definition

## Show available monitors types

In [None]:
wos_client.monitor_definitions.show()

### Get monitors uids and details

In [None]:
custom_monitor_id = custom_monitor_details.metadata.id

print(custom_monitor_id)

In [None]:
custom_monitor_details = wos_client.monitor_definitions.get(monitor_definition_id=custom_monitor_id).result
print('Monitor definition details:', custom_monitor_details)

## Enable custom monitor for subscription

In [None]:
target = Target(
        target_type=TargetTypes.SUBSCRIPTION,
        target_id=subscription_id
    )

thresholds = [MetricThresholdOverride(metric_id='sensitivity', type = MetricThresholdTypes.LOWER_LIMIT, value=0.9)]

custom_monitor_instance_details = wos_client.monitor_instances.create(
            data_mart_id=data_mart_id,
            background_mode=False,
            monitor_definition_id=custom_monitor_id,
            target=target,
            thresholds=thresholds
).result

### Get monitor instance id and configuration details

In [None]:
custom_monitor_instance_id = custom_monitor_instance_details.metadata.id

In [None]:
custom_monitor_instance_details = wos_client.monitor_instances.get(custom_monitor_instance_id).result
print(custom_monitor_instance_details)

## Storing custom metrics

In [None]:
from datetime import datetime, timezone, timedelta
from ibm_watson_openscale.base_classes.watson_open_scale_v2 import MonitorMeasurementRequest
custom_monitoring_run_id = "11122223333111abc"
measurement_request = [MonitorMeasurementRequest(timestamp=datetime.now(timezone.utc), 
                                                 metrics=[{"specificity": 0.78, "sensitivity": 0.67, "region": "us-south"}], run_id=custom_monitoring_run_id)]
print(measurement_request[0])

In [None]:
published_measurement_response = wos_client.monitor_instances.measurements.add(
    monitor_instance_id=custom_monitor_instance_id,
    monitor_measurement_request=measurement_request).result
published_measurement_id = published_measurement_response[0]["measurement_id"]
print(published_measurement_response)

### List and get custom metrics

In [None]:
time.sleep(5)
published_measurement = wos_client.monitor_instances.measurements.get(monitor_instance_id=custom_monitor_instance_id, measurement_id=published_measurement_id).result
print(published_measurement)

# Historical data <a name="historical"></a>

In [None]:
historyDays = 7

 ## Insert historical payloads

The next section of the notebook downloads and writes historical data to the payload and measurement tables to simulate a production model that has been monitored and receiving regular traffic for the last seven days. This historical data can be viewed in the Watson OpenScale user interface. The code uses the Python and REST APIs to write this data.

In [None]:
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_fairness_v2.json
!ls -lh history_fairness_v2.json

In [None]:
from datetime import datetime, timedelta, timezone

with open('history_fairness_v2.json', 'r') as history_file:
    payloads = json.load(history_file)

for day in range(historyDays):
    print('Loading day', day + 1)
    daily_measurement_requests = []
    
    for hour in range(24):
        score_time = datetime.now(timezone.utc) + timedelta(hours=(-(24*day + hour + 1)))
        index = (day * 24 + hour) % len(payloads) # wrap around and reuse values if needed
 
        measurement_request = MonitorMeasurementRequest(timestamp=score_time,metrics = [payloads[index][0], payloads[index][1]])
        daily_measurement_requests.append(measurement_request)
        
        
    response = wos_client.monitor_instances.measurements.add(
                                            monitor_instance_id=fairness_monitor_instance_id,
                                            monitor_measurement_request=daily_measurement_requests).result     
print('Finished')

## Insert historical debias metrics

In [None]:
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_debias_v2.json
!ls -lh history_debias_v2.json

In [None]:
with open('history_debias_v2.json', 'r') as history_file:
    payloads = json.load(history_file)

for day in range(historyDays):
    print('Loading day', day + 1)
    daily_measurement_requests = []
    for hour in range(24):
        score_time = datetime.now(timezone.utc) + timedelta(hours=(-(24*day + hour + 1)))
        index = (day * 24 + hour) % len(payloads) # wrap around and reuse values if needed

        measurement_request = MonitorMeasurementRequest(timestamp=score_time,metrics = [payloads[index][0], payloads[index][1]])
        
        daily_measurement_requests.append(measurement_request)
        
    response = wos_client.monitor_instances.measurements.add(
                                            monitor_instance_id=fairness_monitor_instance_id,
                                            monitor_measurement_request=daily_measurement_requests).result     

print('Finished')

## Insert historical quality metrics

In [None]:
measurements = [0.76, 0.78, 0.68, 0.72, 0.73, 0.77, 0.80]
for day in range(historyDays):
    quality_measurement_requests = []
    print('Loading day', day + 1)
    for hour in range(24):
        score_time = datetime.utcnow() + timedelta(hours=(-(24*day + hour + 1)))
        score_time = score_time.isoformat() + "Z"
        
        metric = {"area_under_roc": measurements[day]}
                
        measurement_request = MonitorMeasurementRequest(timestamp=score_time,metrics = [metric])
        quality_measurement_requests.append(measurement_request)
        
        
    response = wos_client.monitor_instances.measurements.add(
                                            monitor_instance_id=quality_monitor_instance_id,
                                            monitor_measurement_request=quality_measurement_requests).result    
    
print('Finished')

## Insert historical confusion matrixes

In [None]:
!rm history_quality_metrics.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_quality_metrics.json
!ls -lh history_quality_metrics.json

In [None]:
from ibm_watson_openscale.base_classes.watson_open_scale_v2 import Source

with open('history_quality_metrics.json') as json_file:
    records = json.load(json_file)
    
for day in range(historyDays):
    index = 0
    cm_measurement_requests = []
    print('Loading day', day + 1)
    
    for hour in range(24):
        score_time = datetime.utcnow() + timedelta(hours=(-(24*day + hour + 1)))
        score_time = score_time.isoformat() + "Z"

        metric = records[index]['metrics']
        source = records[index]['sources']

        
        measurement_request = {"timestamp": score_time, "metrics": [metric], "sources": [source]}
        cm_measurement_requests.append(measurement_request)

        index+=1

    response = wos_client.monitor_instances.measurements.add(monitor_instance_id=quality_monitor_instance_id, monitor_measurement_request=cm_measurement_requests).result    

print('Finished')

## Insert historical performance metrics

In [None]:
target = Target(
        target_type=TargetTypes.INSTANCE,
        target_id=payload_data_set_id
    )


performance_monitor_instance_details = wos_client.monitor_instances.create(
            data_mart_id=data_mart_id,
            background_mode=False,
            monitor_definition_id=wos_client.monitor_definitions.MONITORS.PERFORMANCE.ID,
            target=target
).result
performance_monitor_instance_id = performance_monitor_instance_details.metadata.id

In [None]:
for day in range(historyDays):
    performance_measurement_requests = []
    print('Loading day', day + 1)
    for hour in range(24):
        score_time = datetime.utcnow() + timedelta(hours=(-(24*day + hour + 1)))
        score_time = score_time.isoformat() + "Z"
        score_count = random.randint(60, 600)
        
        metric = {"record_count": score_count, "data_set_type": "scoring_payload"}
        
        measurement_request = {"timestamp": score_time, "metrics": [metric]}
        performance_measurement_requests.append(measurement_request)
        
    response = wos_client.monitor_instances.measurements.add(
                                            monitor_instance_id=performance_monitor_instance_id,
                                            monitor_measurement_request=performance_measurement_requests).result    

print('Finished')

## Insert historical drift measurements

In [None]:
!rm history_drift_measurement_*.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_0.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_1.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_2.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_3.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_4.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_5.json
!wget https://raw.githubusercontent.com/IBM/watson-openscale-samples/main/IBM%20Cloud/WML/assets/data/historical_data/credit_risk/history_drift_measurement_6.json
!ls -lh history_drift_measurement_*.json

In [None]:
for day in range(historyDays):
    drift_measurements = []

    with open("history_drift_measurement_{}.json".format(day), 'r') as history_file:
        drift_daily_measurements = json.load(history_file)
    print('Loading day', day + 1)

    #Historical data contains 8 records per day - each represents 3 hour drift window.
    
    for nb_window, records in enumerate(drift_daily_measurements):
        for record in records:
            window_start =  datetime.utcnow() + timedelta(hours=(-(24 * day + (nb_window+1)*3 + 1))) # first_payload_record_timestamp_in_window (oldest)
            window_end = datetime.utcnow() + timedelta(hours=(-(24 * day + nb_window*3 + 1)))# last_payload_record_timestamp_in_window (most recent)
            #modify start and end time for each record
            record['sources'][0]['data']['start'] = window_start.isoformat() + "Z"
            record['sources'][0]['data']['end'] = window_end.isoformat() + "Z"
            
            
            metric = record['metrics'][0]
            source = record['sources'][0]

            measurement_request = {"timestamp": window_start.isoformat() + "Z", "metrics": [metric], "sources": [source]}
            
            drift_measurements.append(measurement_request)
        
    response = wos_client.monitor_instances.measurements.add(
                                            monitor_instance_id=drift_monitor_instance_id,
                                            monitor_measurement_request=drift_measurements).result    

    
    print("Daily loading finished.")

## Additional data to help debugging

In [None]:
print('Datamart:', data_mart_id)
print('Model:', model_uid)
print('Deployment:', ai_func_deployment_uid)

## Identify transactions for Explainability

Transaction IDs identified by the cells below can be copied and pasted into the Explainability tab of the OpenScale dashboard.

In [None]:
wos_client.data_sets.show_records(payload_data_set_id, limit=5)

## Congratulations!

You have finished the hands-on lab for IBM Watson OpenScale. You can now view the OpenScale Dashboard: (https://url-to-your-cp4d-cluster/aiopenscale). Click on the tile for the German Credit model to see fairness, accuracy, and performance monitors. Click on the timeseries graph to get detailed information on transactions during a specific time window.
