In [1]:
# Install the latest version of crossdeploy
! pip install --upgrade crossdeploy

# Node is installed by default in Watson Studio, JupyterLab and JupyterNotebook environments. 
# However, in case your environment does not have node installed, it can be installed with the below command.
# ! pip install "nodejs-bin[cmd]"



# Example 3: Build, promote, deploy and monitor a model

In this example, we will build, promote, deploy and monitor a simple SKLearn pipeline model.

# Imports

In [2]:
import io
import requests
import dataclasses
import pandas as pd

from crossdeploy.utils import utils
from crossdeploy.crossdeploy import CrossDeploy

from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_selector, make_column_transformer

import ibm_watson_openscale
import ibm_watson_machine_learning
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator

# Define variables

In [3]:
CPD_API_KEY = "xxx" # or CPD_PASSWORD = "xxx"
CPD_USERNAME = "admin"
CPD_URL = "https://cpd-cpd.itzroks-550003aw18-bwcq3a-6ccd7f378ae819553d37d5f2ee142bd6-0000.au-syd.containers.appdomain.cloud"
COS_API_KEY = "2AAnWqaNX0hF5lpOwLIGJ959chpR3RVmJxdeHFcWa_jD"
PROJECT_ID = "75c938f9-cc5f-45a9-b44f-3f1b51a69516"
SPACE_ID = "c80c0f20-ce3e-45fe-a0ae-e8012abffd81"

wml_credentials = {"username": CPD_USERNAME, "apikey": CPD_API_KEY, "url": CPD_URL, "instance_id": "openshift", "version": "4.6"}
wml_client = ibm_watson_machine_learning.APIClient(wml_credentials)
wos_client = ibm_watson_openscale.APIClient(service_url=CPD_URL, authenticator=CloudPakForDataAuthenticator(url=CPD_URL, username=CPD_USERNAME, apikey=CPD_API_KEY, disable_ssl_verification=True))

# Ensure that you have created a data mart in Watson OpenScale

In [4]:
wos_client.data_marts.show()
# wos_client.service_providers.show()
# wos_client.subscriptions.show()
# wos_client.monitor_instances.show()
# wos_client.monitor_instances.get("96efa40a-5175-440e-b138-3d83a2ff195c").result._to_dict()

0,1,2,3,4,5
,,False,active,2023-01-11 09:33:10.742000+00:00,00000000-0000-0000-0000-000000000000


# Define DATA_MART_ID

In [5]:
DATA_MART_ID = "00000000-0000-0000-0000-000000000000"

# Save, promote, deploy and monitor model

## Model monitoring template

It consists of,

- storing a model in a project
- promoting the model to a deployment space
- creating a deployment
- creating a service provider
- creating a subscription
- creating monitors such as, quality, drift, fairness and explainability

Any updates or modifications to the `config` dictionary, will result in the appropriate actions, such update or delete and create, for the respectively resources.

In [6]:
class ModelMonitor:
    def __init__(self, config):
        PROVIDER_CONFIG = config["provider"]
        MODEL_CONFIG = config["model"]
        DEPLOYMENT_CONFIG = config["deployment"]

        flow = CrossDeploy()

        flow.ibm.Provider(**PROVIDER_CONFIG)

        model_rf = flow.ibm.Model(MODEL_CONFIG["model"])(
            id = "mortgage-model-1",
            name = MODEL_CONFIG["name"],
            project_id = MODEL_CONFIG["project_id"],
        )

        promoted_model_rf = flow.ibm.PromotedModel(model_rf)(
            id = "promoted-model-1",
            project_id = MODEL_CONFIG["project_id"],
            space_id = DEPLOYMENT_CONFIG["space_id"],
            asset_id = model_rf.id,
        )

        deployment = flow.ibm.Deployment(
            id = "deployment-1",
            name = DEPLOYMENT_CONFIG["name"],
            space_id = DEPLOYMENT_CONFIG["space_id"],
            asset = promoted_model_rf.id,
            online = DEPLOYMENT_CONFIG["online"],
        )

        if "service_provider" in config:
            SERVICE_PROVIDER = config["service_provider"]
            service_provider = flow.ibm.ServiceProvider(
                id = "service-provider-1",
                name = SERVICE_PROVIDER["name"],
                service_type = SERVICE_PROVIDER["service_type"],
                operational_space_id = SERVICE_PROVIDER["operational_space_id"],
                deployment_space_id = DEPLOYMENT_CONFIG["space_id"],
                user_name = PROVIDER_CONFIG["username"],
                url = PROVIDER_CONFIG["url"],
                api_key = PROVIDER_CONFIG["api_key"],
            )

            if "subscription" in config:
                SUBSCRIPTION = config["subscription"]
                COS_DATA_REFERENCE = config["cos_data_reference"]
                subscription = flow.ibm.Subscription(
                    id = "subscription-1",
                    name = SUBSCRIPTION["name"],
                    data_mart_id = SUBSCRIPTION["data_mart_id"],
                    service_provider_id = service_provider.id,
                    asset = flow.ibm.SubscriptionAsset(
                        asset_id = promoted_model_rf.id,
                        asset_type = SUBSCRIPTION["asset_type"],
                        input_data_type = SUBSCRIPTION["input_data_type"],
                        problem_type = SUBSCRIPTION["problem_type"],
                        url = deployment.url,
                    ),
                    deployment = flow.ibm.SubscriptionDeployment(
                        deployment_id = deployment.id,
                        deployment_type = "online" if DEPLOYMENT_CONFIG["online"] else None,
                        deployment_url = deployment.url,
                    ),
                    asset_properties = flow.ibm.SubscriptionAssetProperties(**SUBSCRIPTION["asset_properties"]),
                    training_data_schema = flow.ibm.get_training_data_schema(utils.get_training_data_schema(X)),
                    training_data_reference = flow.ibm.SubscriptionTrainingDataReference(**COS_DATA_REFERENCE),
                    payload_file = utils.export_payload_data(X),
                )

                if "monitors" in config:
                    MONITORS_CONFIG = config["monitors"]
                    if "quality" in MONITORS_CONFIG:
                        quality_monitor = flow.ibm.MonitorInstance(
                            id = "quality_monitor",
                            data_mart_id = DATA_MART_ID,
                            monitor_definition_id = "quality",
                            subscription_id = subscription.id,
                            parameters = flow.ibm.MonitorInstanceParameters(min_feedback_data_size = MONITORS_CONFIG["quality"]["min_feedback_data_size"]),
                            thresholds = flow.ibm.get_monitor_instance_thresholds(MONITORS_CONFIG["quality"]["thresholds"]),
                        )
                        flow.ibm.Record(
                            id = "feedback_record",
                            subscription_id = subscription.id,
                            type = "feedback",
                            file_path = utils.export_feedback_data(df),
                            depends_on = [quality_monitor],
                        )

                    if "drift" in MONITORS_CONFIG:
                        flow.ibm.MonitorInstance(
                            id = "drift_monitor",
                            data_mart_id = DATA_MART_ID,
                            monitor_definition_id = "drift",
                            subscription_id = subscription.id,
                            parameters = flow.ibm.MonitorInstanceParameters(**MONITORS_CONFIG["drift"]["parameters"]),
                        )

                    if "fairness" in MONITORS_CONFIG:
                        flow.ibm.MonitorInstance(
                            id = "fairness_monitor",
                            data_mart_id = DATA_MART_ID,
                            monitor_definition_id = "fairness",
                            subscription_id = subscription.id,
                            parameters = flow.ibm.MonitorInstanceParameters(
                                favourable_class = MONITORS_CONFIG["fairness"]["favourable_class"],
                                unfavourable_class = MONITORS_CONFIG["fairness"]["unfavourable_class"],
                                min_records = MONITORS_CONFIG["fairness"]["min_records"],
                                features = flow.ibm.get_monitor_instance_parameters_features(MONITORS_CONFIG["fairness"]["features"]),
                            ),
                            thresholds = flow.ibm.get_monitor_instance_thresholds(MONITORS_CONFIG["fairness"]["thresholds"]),
                        )

                    if "explainability" in MONITORS_CONFIG:
                        flow.ibm.MonitorInstance(
                            id = "explainability_monitor",
                            data_mart_id = DATA_MART_ID,
                            monitor_definition_id = "explainability",
                            subscription_id = subscription.id,
                            parameters = flow.ibm.MonitorInstanceParameters(
                                enabled = True
                            )
                        )

                    self.mrm_monitor = flow.ibm.MonitorInstance(
                        id = "mrm_monitor",
                        data_mart_id = DATA_MART_ID,
                        monitor_definition_id = "mrm",
                        subscription_id = subscription.id,
                    )

        self.flow = flow

    def apply(self):
        self.flow.apply()

    def destroy(self):
        self.flow.destroy()


## Import data and preprocessing step

In [7]:
df = pd.read_csv("https://raw.githubusercontent.com/crossdeploy-io/crossdeploy-examples/main/data/mortgage-default.csv")

label_column = "MortgageDefault"
y = df[label_column]
X = df.drop(label_column, axis=1)

ct = make_column_transformer(
    (OneHotEncoder(), make_column_selector(dtype_include=object)),
    remainder="passthrough"
)

## Build model

In [8]:
model = Pipeline(steps=[
    ("transform", ct), 
    ("clf", RandomForestClassifier(n_estimators=3, max_depth=30, random_state=12345))
])
model.fit(X, y)
model.score(X, y)

0.9618138424821002

## Model monitoring configuration

In [12]:
MODEL_NAME = "mortgage-model-rf"
DEPLOYMENT_NAME = "mortgage-model-rf-deployment"
SUBSCRIPTION_NAME = "mortgage-model-rf-subscription"

PROVIDER_CONFIG = {
    "url": CPD_URL,
    "username": CPD_USERNAME,
    "api_key": CPD_API_KEY,
}

MODEL_CONFIG = {
    "name": MODEL_NAME,
    "model": model,
    "project_id": PROJECT_ID,
}

DEPLOYMENT_CONFIG = {
    "name": DEPLOYMENT_NAME,
    "space_id": SPACE_ID,
    "online": True,
}

SERVICE_PROVIDER = {
    "name": "WML - Dev",
    "service_type": "watson_machine_learning",
    "operational_space_id": "pre_production",
}

SUBSCRIPTION = {
    "name": SUBSCRIPTION_NAME,
    "data_mart_id": DATA_MART_ID,
    "asset_type": "model",
    "input_data_type": "structured",
    "problem_type": "binary",
    "asset_properties": {
        "categorical_fields": X.select_dtypes(include=object).columns.tolist(),
        "feature_fields": X.columns.tolist(),
        "label_column": label_column,
        "prediction_field": "prediction",
        "probability_fields": ["probability"],
    }
}

COS_DATA_REFERENCE = {
    "type": "cos",
    "bucket_name": "data-rp",
    "file_name": "mortgage-default.csv",
    "resource_instance_id": "2bc86622-194d-4728-b280-7bd9fcfbdf80",
    "cos_api_key": COS_API_KEY,
    "cos_url": "https://s3.us.cloud-object-storage.appdomain.cloud",
    "iam_url": "https://iam.bluemix.net/oidc/token",
}

MONITORS_CONFIG = {
    "quality": {
        "min_feedback_data_size": 100,
        "thresholds": [{
            "metric_id": "area_under_roc",
            "type": "lower_limit",
            "value": 0.8,
        }]
    },
    "drift": {
        "parameters": {
            "min_samples":  100,
            "drift_threshold": 0.1,
            "train_drift_model": True,
            "enable_model_drift": True,
            "enable_data_drift": True,
        }
    },
    "fairness": {
        "favourable_class": ["NO"],
        "unfavourable_class": ["YES"],
        "min_records": 100,
        "features": [{
            "feature": "AppliedOnline",
            "majority": ["YES"],
            "minority": ["NO"],
            "threshold": 0.9,
        }],
        "thresholds": [{
            "metric_id": "fairness_value",
            "type": "lower_limit",
            "value": 0.9,
        }]
    },
    "explainability": {
        "enabled": True,
    }
}

config = {
    "provider": PROVIDER_CONFIG,
    "model": MODEL_CONFIG,
    "deployment": DEPLOYMENT_CONFIG,
    "service_provider": SERVICE_PROVIDER,
    "subscription": SUBSCRIPTION,
    "cos_data_reference": COS_DATA_REFERENCE,
    "monitors": MONITORS_CONFIG,
}

model_monitor = ModelMonitor(config)
model_monitor.apply()

Applying ...

[0m[1mInitializing the backend...[0m

[0m[1mInitializing provider plugins...[0m
- Finding randyphoa/ibmcpd versions matching "0.1.14"...
- Installing randyphoa/ibmcpd v0.1.14...
- Installed randyphoa/ibmcpd v0.1.14 (self-signed, key ID [0m[1m0C4FF2867393F57F[0m[0m)

Partner and community providers are signed by their developers.
If you'd like to know more about provider signing, you can read about it here:
https://www.terraform.io/docs/cli/plugins/signing.html

[0m[1m[32mTerraform has been successfully initialized![0m[32m[0m

Terraform used the selected providers to generate the following execution
plan. Resource actions are indicated with the following symbols:
  [32m+[0m create
[0m
Terraform will perform the following actions:

[1m  # ibmcpd_deployment.crossdeploy_deployment1_E539DCE6[0m will be created[0m[0m
[0m  [32m+[0m[0m resource "ibmcpd_deployment" "crossdeploy_deployment1_E539DCE6" {
      [32m+[0m [0m[1m[0masset[0m[0m    = (know

# Check that everything has been setup without errors

In [13]:
wos_client.subscriptions.show()
wos_client.monitor_instances.show()

0,1,2,3,4,5,6,7,8
3ad9b3e5-f372-4751-be26-cbd6eb4d8fed,,00000000-0000-0000-0000-000000000000,874fb24f-6291-4a6a-9cc6-f5cab22fbeaf,mortgage-model-rf-subscription,f131298f-be64-471e-88f4-55c9488c8a82,active,2023-01-16 05:58:53.771000+00:00,b6054e20-bcb5-445b-9e8d-3b872d88f3b4


0,1,2,3,4,5,6
00000000-0000-0000-0000-000000000000,active,b6054e20-bcb5-445b-9e8d-3b872d88f3b4,subscription,drift,2023-01-16 06:02:48.140000+00:00,d91d4ef4-4fda-4970-8c65-8ccf33f63db9
00000000-0000-0000-0000-000000000000,active,b6054e20-bcb5-445b-9e8d-3b872d88f3b4,subscription,explainability,2023-01-16 06:02:48.134000+00:00,af0ab062-a662-4e65-9a16-5b56c94a678c
00000000-0000-0000-0000-000000000000,active,b6054e20-bcb5-445b-9e8d-3b872d88f3b4,subscription,fairness,2023-01-16 06:02:48.163000+00:00,57b8ee4b-959c-4780-9d97-f04be2ebd8e6
00000000-0000-0000-0000-000000000000,active,b6054e20-bcb5-445b-9e8d-3b872d88f3b4,subscription,quality,2023-01-16 06:02:48.163000+00:00,24a9b300-31e3-4f95-b49a-a6a9721d1d38
00000000-0000-0000-0000-000000000000,active,b6054e20-bcb5-445b-9e8d-3b872d88f3b4,subscription,mrm,2023-01-16 06:02:48.127000+00:00,e8c59710-b514-441c-aaf5-1e51e023942f


# Run MRM monitor

In [14]:
mrm_monitor_instance_id = utils.get_resource_by_name(model_monitor.mrm_monitor.friendly_unique_id)["id"]

buffer = io.BytesIO()
df.sample(500, replace=True).to_csv(buffer, index=False, mode="wb", encoding="utf-8")
file_name = "test_data_1.csv"
POST_EVALUATIONS_URL = f"{CPD_URL}/openscale/{DATA_MART_ID}/v2/monitoring_services/mrm/monitor_instances/{mrm_monitor_instance_id}/risk_evaluations?test_data_set_name={file_name}"
headers = {"Content-Type": "text/csv", "Accept": "application/json", "Authorization": wml_client._get_headers()["Authorization"]}
buffer.seek(0)
response = requests.post(POST_EVALUATIONS_URL, data=buffer.read(), headers=headers, verify=False)
response.text

'{"evaluation_id": "e4acc5ae-065f-496f-9c1f-ebf253692c0a", "evaluation_date": "2023-01-16T06:05:51.607000Z", "publish_metrics": "false", "evaluation_tests": ["drift", "fairness", "quality", "explainability"], "evaluation_start_time": "2023-01-16T06:05:52.826133Z", "status": {"state": "UPLOAD_IN_PROGRESS"}}\n'

# Clean up

In [15]:
# model_monitor.destroy()

Destroying ...
[0m[1mibmcpd_model.crossdeploy_mortgagemodel1_075D9387: Refreshing state... [id=80f929ee-b609-4291-bad0-36fdc938e8d0][0m
[0m[1mibmcpd_service_provider.crossdeploy_serviceprovider1_B7A74882: Refreshing state... [id=f131298f-be64-471e-88f4-55c9488c8a82][0m
[0m[1mibmcpd_model.crossdeploy_promotedmodel1_509720E4: Refreshing state... [id=3ad9b3e5-f372-4751-be26-cbd6eb4d8fed][0m
[0m[1mibmcpd_deployment.crossdeploy_deployment1_E539DCE6: Refreshing state... [id=874fb24f-6291-4a6a-9cc6-f5cab22fbeaf][0m
[0m[1mibmcpd_subscription.crossdeploy_subscription1_A3423F9C: Refreshing state... [id=b6054e20-bcb5-445b-9e8d-3b872d88f3b4][0m
[0m[1mibmcpd_monitor_instance.crossdeploy_mrmmonitor_09DF8657: Refreshing state... [id=e8c59710-b514-441c-aaf5-1e51e023942f][0m
[0m[1mibmcpd_monitor_instance.crossdeploy_explainabilitymonitor_C90B4AD9: Refreshing state... [id=af0ab062-a662-4e65-9a16-5b56c94a678c][0m
[0m[1mibmcpd_monitor_instance.crossdeploy_driftmonitor_3089CFDB: Refr