# <img src="https://github.com/pmservice/ai-openscale-tutorials/raw/master/notebooks/images/banner.png" align="left" alt="banner">

# Monitor your ML Models using Watson OpenScale and WML on Cloud Pak for Data

## 1. Setup the Notebook Environment

## 1.1 Install the necessary packages

### Watson OpenScale Python SDK

In [None]:
!pip install ibm-ai-openscale

### Scikit-learn version 0.20


In [None]:
!pip install scikit-learn==0.20.3

### Watson Machine Learning Python SDK

In [None]:
!pip install --upgrade watson-machine-learning-client-V4==1.0.93 | tail -n 1

Restart the Notebook after Installing the required packages. By clicking on `Kernel>Restart`

## 1.2 Import Packages

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn import preprocessing
from sklearn import svm, metrics
from scipy import sparse
from watson_machine_learning_client import WatsonMachineLearningAPIClient
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
import json
import ibm_db


import pandas as pd
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.engines import *
from ibm_ai_openscale.utils import *
from ibm_ai_openscale.supporting_classes import PayloadRecord, Feature
from ibm_ai_openscale.supporting_classes.enums import *

## 2. Configuration

### 2.1 Global Variables

**<font color='red'> UPDATE THE VARIABLE 'dep_name' TO THE NAME OF THE DEPLOYMENT SPACE CREATED PREVIOUSLY</font>**

1. Right Click on the project name in the upper left section of the screen
2. Click on the tab where the project is opened
3. Click on Settings tab
4. Copy the `Associated deployment space` which we created in the previous lab tutorial for watson machine learnig
5. Paste the value in the `dep_name` variable

**<font color='red'> UPDATE THE VARIABLE 'MODEL_NAME' TO A UNIQUE NAME</font>**

In [None]:
MODEL_NAME="fraud_claim_classifier_srs"
DEPLOYMENT_NAME="fraud_claim_monitoring_deployment_srs"
# Ensure you create a an empty Schema and store the name in this variable
SCHEMA_NAME="INSURANCE"

# Enter the Deployment Space you have associated project with 
dep_name="fraud_prediction_deployment_space_srs"

### 2.2 Add Dataset

Select the `Insert Pandas Dataframe` option, after selecting the below cell. Ensure the variable name is `df_data_1`

### 2.3 Update your AIOS Credentials

**<font color='red'> Add your `username` and `password`</font>**

In [None]:
WOS_CREDENTIALS={
    "url" : os.environ['RUNTIME_ENV_APSX_URL'],
    "username":"XXXXXXX",
    "password":"XXXXXX"
}

### 2.4 Input your WML Credentials 


In [None]:
import sys,os,os.path


# WML_CREDENTIALS = {
# "token": os.environ['USER_ACCESS_TOKEN'],
# "instance_id" : "wml_local",
# "url" : os.environ['RUNTIME_ENV_APSX_URL'],
# "version": "3.0.0"
# }
WML_CREDENTIALS = WOS_CREDENTIALS.copy()
WML_CREDENTIALS['instance_id']='openshift'
WML_CREDENTIALS['version']='3.0.0'

### 2.5 Add your Db credentials

#### These Db credentials are needed ONLY if you have NOT configured your `OpenScale Datamart`.

In [None]:

# DATABASE_CREDENTIALS = {
#     "hostname": "dashdb-txn-sbox-yp-dal09-11.services.dal.bluemix.net",
#     "username": "tzm22305",
#     "password": "s2knhr3znx-c5s03",
#     "port": 50000,
#     "db": "BLUDB",
    
# }


## 3. Create the Fraud claims prediction Model using Scikit-Learn

In [None]:
required_columns = ['insured_sex', 'insured_occupation', 'insured_hobbies',
       'capital_gains', 'capital_loss', 'incident_type', 'collision_type', 'incident_severity',
       'authorities_contacted', 'incident_hour_of_the_day', 'number_of_vehicles_involved',
       'witnesses', 'total_claim_amount', 'fraud_reported', 'policy_annual_premium']

df1 = df_data_1[required_columns]

#### Checking for missing values

In [None]:
df1.isnull().sum()

#### This step is required if you have missing values in the `insured_hobbies` which I missed during the lab excercise. Otherwise you can skip this step

In [None]:
df1['insured_hobbies'].fillna('cross-fit', inplace=True)
df1.isnull().sum()

In [None]:
categorical_features = []
for col in df1.columns:
    if col != 'fraud_reported':
      if df1[col].dtype == 'object':
        categorical_features.append(col)

numeric_features = df1.select_dtypes(include=['int64', 'float64']).columns
# df2 = pd.get_dummies(df1, columns = columns_to_encode)
df2 = df1

In [None]:
'''Add a categorical transformer to your model pipeline. 
    You will need to add a label encoder into the model pipeline before storing it into WML '''


numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
df2['fraud_reported'] = df2['fraud_reported'].str.replace('Y', '1')
df2['fraud_reported'] = df2['fraud_reported'].str.replace('N', '0')
df2['fraud_reported'] = df2['fraud_reported'].astype(int)

In [None]:
features = []
for col in df2.columns:
  if col != 'fraud_reported':
    features.append(col)

target = 'fraud_reported'

X = df2[features]
y = df2[target]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)


In [None]:
from sklearn.svm import SVC
from sklearn.metrics  import accuracy_score, classification_report

pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
#         ('scale', StandardScaler()),
        ('clf', SVC(kernel = 'linear'))])
pipeline.fit(X_train, y_train)

preds = pipeline.predict(X_test)

print(accuracy_score(preds, y_test))
print(classification_report(y_test, preds))

## 4. Create a new deployment

In [None]:
client = WatsonMachineLearningAPIClient(WML_CREDENTIALS)

In [None]:
meta_props={
 client.repository.ModelMetaNames.NAME: MODEL_NAME,
 client.repository.ModelMetaNames.RUNTIME_UID: "scikit-learn_0.20-py3.6",
 client.repository.ModelMetaNames.TYPE: "scikit-learn_0.20",
}

In [None]:

project_id = os.environ['PROJECT_ID']
client.set.default_project(project_id)

In [None]:
def guid_from_space_name(client, space_name):

    instance_details = client.service_instance.get_details()

    space = client.spaces.get_details()
    res=[]
    for item in space['resources']: 
        if item['entity']["name"] == space_name:
            res=item['metadata']['guid']

    return res

In [None]:
# Enter the name of your deployment space of the current project

space_uid = guid_from_space_name(client, dep_name)

In [None]:
space_uid

In [None]:
client.set.default_space(space_uid)


### Store, Deploy and Score your Custom WML Model

In [None]:
deploy_meta = {
     client.deployments.ConfigurationMetaNames.NAME: DEPLOYMENT_NAME,
     client.deployments.ConfigurationMetaNames.ONLINE: {}
 }

In [None]:
## Store the model on WML
published_model = client.repository.store_model(pipeline,
                                             meta_props=meta_props,
                                             training_data=X_train,
                                             training_target=y_train
                                                )


In [None]:
published_model_uid = client.repository.get_model_uid(published_model)

In [None]:
## Create a Deployment for your stored model

created_deployment = client.deployments.create(published_model_uid, meta_props=deploy_meta)

In [None]:

scoring_endpoint = None
deployment_uid=created_deployment['metadata']['guid']

## 5. Setup your Watson Openscale Dashboard 

### 5.1 Create the Watson Openscale Client

In [None]:
ai_client = APIClient4ICP(aios_credentials=WOS_CREDENTIALS)
ai_client.version

### 5.2 Setup the Datamart on AI OpenScale

In [None]:
try:
    data_mart_details = ai_client.data_mart.get_details()
    print('Using existing external datamart')
except:
    print('Setting up external datamart')
    ai_client.data_mart.setup(db_credentials=DATABASE_CREDENTIALS, schema=SCHEMA_NAME)

In [None]:
data_mart_details = ai_client.data_mart.get_details()

In [None]:
data_mart_details

### 5.3 Add your Machine Learning Provider

If you have already bound the ML Provider to the Openscale instance, then just retrieve the binding_uid, by commenting first line and uncommenting the second line

In [None]:
WML_CREDENTIALS

**<font color='red'> Add your initials to the instance name like `WML instance - srs`</font>**

In [None]:
binding_uid = ai_client.data_mart.bindings.add('WML instance - srs', WatsonMachineLearningInstance4ICP(wml_credentials=WML_CREDENTIALS))


In [None]:
ai_client.data_mart.bindings.list_assets()

### 5.4 Perform Initial Scoring for your Model Deployment


In [None]:
score=X_test.tail(20)
score

In [None]:
scoring_data=list(list(x) for x in zip(*(score[x].values.tolist() for x in score.columns)))
scoring_data

In [None]:
fields=list(X_test.columns)
print(len(fields))
fields, scoring_data[0]

In [None]:

job_payload = {
client.deployments.ScoringMetaNames.INPUT_DATA: [{
 'values': scoring_data
}]
}
print(job_payload)

In [None]:
scoring_response = client.deployments.score(deployment_uid, job_payload)

print(scoring_response)

### 5.5 Create a new Subscription 

In [None]:
subscription = ai_client.data_mart.subscriptions.add(WatsonMachineLearningAsset(
    published_model_uid,
    problem_type=ProblemType.BINARY_CLASSIFICATION,
    input_data_type=InputDataType.STRUCTURED,
    label_column='fraud_reported',
    prediction_column='prediction',
    probability_column='prediction_probability',
    categorical_columns = categorical_features,
    feature_columns = list(X_train.columns.values),
#     feature_columns = list(numeric_features.values),
))

In [None]:
subscriptions_uids = ai_client.data_mart.subscriptions.get_uids()
ai_client.data_mart.subscriptions.list()

### 5.6 Perform Inital Payload Logging
Note: You may re-use this code snippet by modifying the request_data variable to perform payload logging after finishing the initial dashboard setup

In [None]:
fields=list(X_test.columns)

request_data = {
    "fields": fields,
    "values": scoring_data
  }
request_data

**<font color='red'><< REPLACE subscription_uid BELOW with the uid for your subscription. For e.g.<br/>subscription_uid="644e4e6d-8a82-4f07-9489-381d44469a23" >></font>**

In [None]:
## From the output of the above table choose your model name and copy the uid against it. Store the uid in the subscription_uid variable


subscription_uid="0ac145d5-f898-4e73-a5b5-a12294824099"
from ibm_ai_openscale import APIClient4ICP
from ibm_ai_openscale.supporting_classes import PayloadRecord


subscription = ai_client.data_mart.subscriptions.get(subscription_uid=subscription_uid)


records = [PayloadRecord(request=request_data, response=scoring_response, response_time=18), 
                PayloadRecord(request=request_data, response=scoring_response, response_time=12)]

subscription.payload_logging.store(records=records)

### 5.7 Setup Quality Monitoring

```NOTE: If you are using the dataset provided in the workshop, leave the threshold monitors to these values. However, if you are using your own dataset, you can play around with the threshold value (value b/w 0 and 1) according to your requirement.```

In [None]:
time.sleep(5)
subscription.quality_monitoring.enable(threshold=0.90, min_records=5)

### 5.8 Log Feedback Data to your Subscription

In [None]:
feedback_data_raw=pd.concat([X_test,y_test],axis=1)
feedback_data_raw

In [None]:
feedback_data=list(map(list, feedback_data_raw.tail(100).itertuples(index=False)))
feedback_data

In [None]:
# feedback_data=feedback_data_raw.tail(20).values.tolist()
# feedback_data

In [None]:
feedback_scoring={
    "data":feedback_data
}

In [None]:
subscription.feedback_logging.store(feedback_scoring['data'])


In [None]:
subscription.feedback_logging.show_table()

#### Run an inital quality test

In [None]:
run_details = subscription.quality_monitoring.run(background_mode=False)

In [None]:
subscription.quality_monitoring.show_table()

In [None]:
%matplotlib inline

quality_pd = subscription.quality_monitoring.get_table_content(format='pandas')
quality_pd.plot.barh(x='id', y='value');

### 5.9 Setup the Fairness Monitors

The code below configures fairness monitoring for our model. It turns on monitoring for one feature, gender of the insured. In each case, we must specify:
  * Which model feature to monitor
  * One or more **majority** groups, which are values of that feature that we expect to receive a higher percentage of favorable outcomes
  * One or more **minority** groups, which are values of that feature that we expect to receive a higher percentage of unfavorable outcomes
  * The threshold at which we would like OpenScale to display an alert if the fairness measurement falls below (in this case, 95%)

Additionally, we must specify which outcomes from the model are favourable outcomes, and which are unfavourable. We must also provide the number of records OpenScale will use to calculate the fairness score. In this case, OpenScale's fairness monitor will run hourly, but will not calculate a new fairness rating until at least 50 records have been added. Finally, to calculate fairness, OpenScale must perform some calculations on the training data, so we provide the dataframe containing the data.

In [None]:
subscription.fairness_monitoring.enable(
            features=[
                Feature("insured_sex", majority=['MALE'], minority=['FEMALE'], threshold=0.95),
            ],
            favourable_classes=["1"],
            unfavourable_classes=["0"],
            min_records=50,
            training_data=df_data_1
        )

In [None]:
score2=X_test.head(200)

scoring_data2=list(list(x) for x in zip(*(score2[x].values.tolist() for x in score2.columns)))

fields2=list(X_test.columns)

job_payload2 = {
client.deployments.ScoringMetaNames.INPUT_DATA: [{
 'values': scoring_data2
}]
}

scoring_response2 = client.deployments.score(deployment_uid, job_payload2)


request_data2 = {
    "fields": fields,
    "values": scoring_data2
  }

records2 = [PayloadRecord(request=request_data2, response=scoring_response2, response_time=18), 
                PayloadRecord(request=request_data2, response=scoring_response2, response_time=12)]

subscription.payload_logging.store(records=records2)

In [None]:
time.sleep(200)

run_details = subscription.fairness_monitoring.run(background_mode=False)

In [None]:
time.sleep(5)

subscription.fairness_monitoring.show_table()

## 6.0 Custom monitors and metrics


### 6.1 Register custom monitor

In [None]:
def get_definition(monitor_name):
    monitors_definitions = ai_client.data_mart.monitors.get_details()['monitor_definitions']
    
    for definition in monitors_definitions:
        if monitor_name == definition['entity']['name']:
            return definition
    
    return None

Change `monitor_name` to something unique.

In [None]:
from ibm_ai_openscale.supporting_classes import Metric, Tag

monitor_name = 'custom_monitor_shivam'
metrics = [Metric(name='sensitivity', lower_limit_default=0.8), Metric(name='specificity', lower_limit_default=0.75)]
tags = [Tag(name='region', description='customer geographical region')]

existing_definition = get_definition(monitor_name)

if existing_definition is None:
    my_monitor = ai_client.data_mart.monitors.add(name=monitor_name, metrics=metrics, tags=tags)
else:
    my_monitor = existing_definition

### 6.1.1 Get monitors uids and details

In [None]:
monitor_uid = my_monitor['metadata']['guid']

print(monitor_uid)

In [None]:
my_monitor = ai_client.data_mart.monitors.get_details(monitor_uid=monitor_uid)
print('monitor definition details', my_monitor)

### 6.2 Enable custom monitor for subscription

In [None]:
from ibm_ai_openscale.supporting_classes import Threshold

thresholds = [Threshold(metric_uid='sensitivity', lower_limit=0.9)]
subscription.monitoring.enable(monitor_uid=monitor_uid, thresholds=thresholds)

### 6.2.1 Get monitor configuration details

In [None]:
subscription.monitoring.get_details(monitor_uid=monitor_uid)

### 6.3 Storing custom metrics

In [None]:
metrics = {"specificity": 0.78, "sensitivity": 0.67, "region": "us-south"}

subscription.monitoring.store_metrics(monitor_uid=monitor_uid, metrics=metrics)

### 6.3.1 List and get custom metrics

In [None]:
subscription.monitoring.show_table(monitor_uid=monitor_uid)

In [None]:
custom_metrics = subscription.monitoring.get_metrics(monitor_uid=monitor_uid, deployment_uid='credit')
custom_metrics

In [None]:
custom_metrics_pandas = subscription.monitoring.get_table_content(monitor_uid=monitor_uid)

%matplotlib inline
custom_metrics_pandas.plot.barh(x='id', y='value');

## 7.0 Payload analytics


### 7.1 Run data distributions calculation

In [None]:
from datetime import datetime

start_date = "2018-01-01T00:00:00.00Z"
end_date = datetime.utcnow().isoformat() + "Z"

sex_distribution = subscription.payload_logging.data_distribution.run(
            start_date=start_date,
            end_date=end_date,
            group=['prediction', 'insured_sex'],
            agg=['count'])

### 7.2 Get data distributions as pandas dataframe


In [None]:
sex_distribution_run_uid = sex_distribution['id']
distributions_pd = subscription.payload_logging.data_distribution.get_run_result(run_id=sex_distribution_run_uid, format='pandas')
distributions_pd

In [None]:
subscription.payload_logging.data_distribution.show_chart(sex_distribution_run_uid);

## 8. Identify transactions for Explainability

In [None]:
from ibm_ai_openscale.supporting_classes import *
subscription.explainability.enable(training_data=df_data_1)

In [None]:
payload_data = subscription.payload_logging.get_table_content(limit=60)
payload_data.filter(items=['scoring_id', 'predictedLabel', 'probability'])

### Add some more Payload (Optional for populating your dashboard)

If you wish to add some Payload Data. Take different sections of your test dataset and send to OpenScale as shown below-

In [None]:
score100=X_test.head(100)
score100

In [None]:
scoring_data100=list(list(x) for x in zip(*(score100[x].values.tolist() for x in score100.columns)))
scoring_data100

In [None]:
fields=list(X_test.columns)
print(len(fields))
fields, scoring_data100[0]

In [None]:
request_data100 = {
    "fields": fields,
    "values": scoring_data100
  }
request_data100

In [None]:
records100 = [PayloadRecord(request=request_data100, response=scoring_response, response_time=18), 
                PayloadRecord(request=request_data100, response=scoring_response, response_time=12)]

subscription.payload_logging.store(records=records100)