How to generate counterfactuals for a model with Responsible AI (Part 8) 
https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/how-to-generate-counterfactuals-for-a-model-with-responsible-ai/ba-p/3803934#:~:text=The%20Responsible%20AI%20%28RAI%29%20dashboard%20provides%20the%20Counterfactual,change%20to%20get%20opposite%20or%20desired%20model%20prediction.

Generate a Responsible AI insights with YAML and Python
https://learn.microsoft.com/en-us/azure/machine-learning/how-to-responsible-ai-insights-sdk-cli?view=azureml-api-2&tabs=yaml


In [1]:
#%pip install -U azure.ai.ml mltable

In [2]:
import os
import time
from dotenv import load_dotenv
import pandas as pd

import json

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

from azure.ai.ml import load_component
from azure.ai.ml import dsl, Input

from azure.ai.ml.entities import PipelineJob
from IPython.core.display import HTML
from IPython.display import display

In [3]:
load_dotenv()
SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")
RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP")
AML_WORKSPACE_NAME = os.environ.get("AML_WORKSPACE_NAME")
print("AML_WORKSPACE_NAME:", AML_WORKSPACE_NAME)

credential = DefaultAzureCredential()
ml_client = MLClient(credential=credential, subscription_id=SUBSCRIPTION_ID, resource_group_name=RESOURCE_GROUP, workspace_name=AML_WORKSPACE_NAME, )

AML_WORKSPACE_NAME: aauki_eli_demos_amlws01


RAI registry

In [4]:
# Get handle to azureml registry for the RAI built in components
version_string = "1"
compute_name = "rai-cluster"

registry_name = "azureml"
ml_client_registry = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,
    resource_group_name=RESOURCE_GROUP,
    registry_name=registry_name,
)
print(ml_client_registry)

MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7f9460040040>,
         subscription_id=6c6683e9-e5fe-4038-8519-ce6ebec2ba15,
         resource_group_name=registry-builtin-prod-eastus-01,
         workspace_name=None)


In [5]:
label = "latest"

rai_constructor_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_constructor", label=label
)

# We get latest version and use the same version for all components
version = rai_constructor_component.version
print("The current version of RAI built-in components is: " + version)

rai_explanation_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_explanation", version=version
)

rai_causal_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_causal", version=version
)

rai_counterfactual_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_counterfactual", version=version
)

rai_erroranalysis_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_erroranalysis", version=version
)

rai_gather_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_gather", version=version
)

rai_scorecard_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_score_card", version=version
)

The current version of RAI built-in components is: 0.14.0


In [6]:
model_name_suffix = int(time.time())
model_name = "us_health_regression_py"

expected_model_id = f"{model_name}_{model_name_suffix}:1"
azureml_model_id = f"azureml:{expected_model_id}"

In [7]:
causal_discovery_component  = load_component(source="../components/causal_discovery/causal_discovery.yaml")
linear_regression_component = load_component(source="../components/linear_regression/linear_regression.yaml")

In [8]:

target_feature = "STROKE_AgeAdjPrv"
# exlude strings and Ints:TotalPopulation;
#exclude_features = '"LocationID","LocationName","StateAbbr","StateDesc","STROKE_CrdPrv","TotalPopulation"'
exclude_features = "NA"
#categorical_features = json.dumps(["LocationID","LocationName","StateAbbr","StateDesc"])

# Binge drinking among adults aged >=18 years
# Current smoking among adults aged >=18 years
# Sleeping less than 7 hours among adults aged >=18 years
# No leisure-time physical activity among adults aged >=18 years

#['BING_CrdPrv', 'BING_AgeAdjPrv']


treatment_features = json.dumps(
    ["CSMOKING_CrdPrv","LPA_CrdPrv","SLEEP_CrdPrv","CSMOKING_AgeAdjPrv","LPA_AgeAdjPrv","SLEEP_AgeAdjPrv"]
)

health_train = Input(
    type="mltable",
    path=f"azureml:health_train_mltable:03",
    mode="download",
)
health_test = Input(
    type="mltable",
    path=f"azureml:health_test_mltable:03",
    mode="download",
)


@dsl.pipeline(
    compute=compute_name,
    description="Register Model for RAI Health example",
    experiment_name=f"RAI_Health_Example_PY_{model_name_suffix}",
)
def rai_demo_pipeline(target_column_name, training_data, test_data):
    causal_discovery_job = causal_discovery_component(
        data_mltable = training_data,
        exclude_features=exclude_features
    )

    trained_model = linear_regression_component(
        target_column_name=target_column_name,
        training_data=training_data,
        test_data=test_data,
        exclude_features=exclude_features,
        model_base_name=f"{model_name}_{model_name_suffix}"
    )
    trained_model.set_limits(timeout=7200)

    # Initiate the RAIInsights
    create_rai_job = rai_constructor_component(
        title="RAI Dashboard Example",
        #title = trained_model.outputs.model_output,
        task_type="regression",
        model_info=expected_model_id,
        model_input=trained_model.outputs.model_output,
        train_dataset=training_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        #categorical_column_names=json.dumps(categorical_features),
        #categorical_column_names=categorical_features,
        use_model_dependency=True,
    )
    create_rai_job.set_limits(timeout=7200)
    
    # Add an explanation
    explain_job = rai_explanation_component(
        comment="Explanation for the housing dataset",
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
    )
    explain_job.set_limits(timeout=7200)

    # Add error analysis
    erroranalysis_job = rai_erroranalysis_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
    )
    erroranalysis_job.set_limits(timeout=7200)
    
    # Add causal analysis
    causal_job = rai_causal_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        treatment_features=treatment_features,
    )
    causal_job.set_limits(timeout=7200)

    # Add counterfactual analysis
    counterfactual_job = rai_counterfactual_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        total_cfs=10,
        desired_range="[0, 0.50]",
        features_to_vary = treatment_features,
    )
    counterfactual_job.set_limits(timeout=7200)
    
    # Combine everything
    rai_gather_job = rai_gather_component(
        constructor=create_rai_job.outputs.rai_insights_dashboard,
        insight_1=explain_job.outputs.explanation,
        insight_2=causal_job.outputs.causal,
        insight_3=counterfactual_job.outputs.counterfactual,
        insight_4=erroranalysis_job.outputs.error_analysis,
    )
    rai_gather_job.set_limits(timeout=7200)

    rai_gather_job.outputs.dashboard.mode = "upload"
    rai_gather_job.outputs.ux_json.mode = "upload"
    
    """
    #return {"dashboard": rai_gather_job.outputs.dashboard,"ux_json": rai_gather_job.outputs.ux_json,}
    """
    return {}

rai_demo_pipeline_job = rai_demo_pipeline(target_feature, health_train, health_test)

In [9]:
from azure.ai.ml.entities import PipelineJob
from IPython.core.display import HTML
from IPython.display import display


def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    print("Pipeline job can be accessed in the following URL:")
    display(HTML('<a href="{0}">{0}</a>'.format(created_job.studio_url)))

    while created_job.status not in [
        "Completed",
        "Failed",
        "Canceled",
        "NotResponding",
    ]:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print("Latest status : {0}".format(created_job.status))
    assert created_job.status == "Completed"
    return created_job


# This is the actual submission
#training_job = submit_and_wait(ml_client, model_registration_pipeline_job)

In [10]:
#created_job = ml_client.jobs.create_or_update(rai_demo_pipeline_job)
created_job = submit_and_wait(ml_client, rai_demo_pipeline_job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading causal_discovery (0.01 

Pipeline job can be accessed in the following URL:


Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : 