# Esta notebook prepara el json de input para crear un monitor

In [10]:
!pip install --upgrade ibm-watson-openscale

Collecting ibm-watson-openscale
  Downloading ibm_watson_openscale-3.0.14-py3-none-any.whl (173 kB)
[K     |████████████████████████████████| 173 kB 18.1 MB/s eta 0:00:01
Installing collected packages: ibm-watson-openscale
  Attempting uninstall: ibm-watson-openscale
    Found existing installation: ibm-watson-openscale 3.0.13
    Uninstalling ibm-watson-openscale-3.0.13:
      Successfully uninstalled ibm-watson-openscale-3.0.13
Successfully installed ibm-watson-openscale-3.0.14


In [1]:
import pandas as pd
# Librerias IBM
from ibm_watson_studio_lib import access_project_or_space
wslib = access_project_or_space()

In [8]:
data_df  = pd.read_csv( wslib.load_data("melb_data.csv"))

In [3]:
service_configuration_support = {
    "enable_fairness": False,
    "enable_explainability": True,
    "enable_drift": True
}

In [4]:
training_data_info = {
    "class_label": "Price",
    "feature_columns": ['Rooms', 'Bathroom', 'Landsize', 'Lattitude', 'Longtitude'],
    "categorical_columns": []
}

In [5]:
model_type = "regression"

In [6]:
# min_records = <Minimum number of records to be considered for preforming scoring>
min_records = 50
# max_records = <Maximum number of records to be considered while computing fairness> [OPTIONAL]
max_records = None

In [10]:
from ibm_watson_openscale.utils.training_stats import TrainingStats
VERSION = "5.3.2"

enable_explainability = service_configuration_support.get("enable_explainability")
enable_fairness = service_configuration_support.get("enable_fairness")

if enable_explainability or enable_fairness:
    fairness_inputs = None
    if enable_fairness:
        fairness_inputs = {
                "fairness_attributes": fairness_attributes,
                "min_records" : min_records,
                "favourable_class" :  parameters["favourable_class"],
                "unfavourable_class": parameters["unfavourable_class"]
            }
        if max_records is not None:
            fairness_inputs["max_records"] = max_records
    
    input_parameters = {
        "label_column": training_data_info["class_label"],
        "feature_columns": training_data_info["feature_columns"],
        "categorical_columns": training_data_info["categorical_columns"],
        "fairness_inputs": fairness_inputs,  
        "problem_type" : model_type  
    }

    training_stats = TrainingStats(data_df,input_parameters, explain=enable_explainability, fairness=enable_fairness, drop_na=True)
    config_json = training_stats.get_training_statistics()
    config_json["notebook_version"] = VERSION
#print(config_json)

In [11]:
import json

print("Finished generating training distribution data")

# Create a file download link
import base64
from IPython.display import HTML

def create_download_link( title = "Download training data distribution JSON file", filename = "training_distribution.json"):  
    if enable_explainability or enable_fairness:
        output_json = json.dumps(config_json, indent=2)
        b64 = base64.b64encode(output_json.encode())
        payload = b64.decode()
        html = '<a download="{filename}" href="data:text/json;base64,{payload}" target="_blank">{title}</a>'
        html = html.format(payload=payload,title=title,filename=filename)
        return HTML(html)
    else:
        print("No download link generated as fairness/explainability services are disabled.")

create_download_link()

Finished generating training distribution data


In [14]:
!pip install ibm-wos-utils

Collecting ibm-wos-utils
  Downloading ibm_wos_utils-4.0.34-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (10.1 MB)
[K     |████████████████████████████████| 10.1 MB 25.1 MB/s eta 0:00:01
Collecting scikit-learn==0.24.2
  Downloading scikit_learn-0.24.2-cp38-cp38-manylinux2010_x86_64.whl (24.9 MB)
[K     |████████████████████████████████| 24.9 MB 83.9 MB/s eta 0:00:01
Collecting jenkspy
  Downloading jenkspy-0.2.0.tar.gz (55 kB)
[K     |████████████████████████████████| 55 kB 5.1 MB/s  eta 0:00:01
Building wheels for collected packages: jenkspy
  Building wheel for jenkspy (setup.py) ... [?25ldone
[?25h  Created wheel for jenkspy: filename=jenkspy-0.2.0-cp38-cp38-linux_x86_64.whl size=79170 sha256=e1b3e3e9af10d931f384ef465770314add1f35b6cd23b1823a0861c33f1621c7
  Stored in directory: /tmp/1000650000/.cache/pip/wheels/92/c2/4f/4f1d3c2fc01225f2a082ceb80611e2b2fb10cb8fb184d78d91
Successfully built jenkspy
Installing collected packages

In [16]:

#Generate drift detection model
from ibm_wos_utils.drift.drift_trainer import DriftTrainer
#from ibm_watson_openscale.utils.training_stats import DriftTrainer

enable_drift = service_configuration_support.get("enable_drift")
if enable_drift:
    drift_detection_input = {
        "feature_columns":training_data_info.get("feature_columns"),
        "categorical_columns":training_data_info.get("categorical_columns"),
        "label_column": training_data_info.get("class_label"),
        "problem_type": model_type
    }
    
    drift_trainer = DriftTrainer(data_df,drift_detection_input)
    if model_type != "regression":
        #Note: batch_size can be customized by user as per the training data size
        drift_trainer.generate_drift_detection_model(score, batch_size=data_df.shape[0], check_for_ddm_quality=False)
        
    drift_trainer.learn_constraints(
        two_column_learner_limit=200, categorical_unique_threshold=0.8, user_overrides=[])
    drift_trainer.create_archive()

Computing feature stats...: 100%|██████████| 5/5 [00:00<00:00, 98.42features/s]
Learning single feature constraints...: 100%|██████████| 8/8 [00:00<00:00, 271.48constraints/s]
Learning two feature constraints...: 100%|██████████| 13/13 [00:00<00:00, 56.11constraints/s]


In [17]:
from IPython.display import HTML
import base64
import io

def create_download_link_for_ddm( title = "Download Drift detection model", filename = "drift_detection_model.tar.gz"):  
    
    #Retains stats information    
    if enable_drift:
        with open(filename,"rb") as file:
            ddm = file.read()
        b64 = base64.b64encode(ddm)
        payload = b64.decode()
        
        html = '<a download="{filename}" href="data:text/json;base64,{payload}" target="_blank">{title}</a>'
        html = html.format(payload=payload,title=title,filename=filename)
        return HTML(html)
    else:
        print("Drift Detection is not enabled. Please enable and rerun the notebook")

create_download_link_for_ddm()