In [9]:
import os
import time
from dotenv import load_dotenv

import json
from azure.ai.ml import Input

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

from azure.ai.ml import load_component
from azure.ai.ml import dsl, Input


In [10]:
load_dotenv()
SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")
RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP")
AML_WORKSPACE_NAME = os.environ.get("AML_WORKSPACE_NAME")
print("AML_WORKSPACE_NAME:", AML_WORKSPACE_NAME)

credential = DefaultAzureCredential()
ml_client = MLClient(credential=credential, subscription_id=SUBSCRIPTION_ID, resource_group_name=RESOURCE_GROUP, workspace_name=AML_WORKSPACE_NAME, )

AML_WORKSPACE_NAME: aauki_eli_demos_amlws01


RAI registry

In [11]:
# Get handle to azureml registry for the RAI built in components
version_string = "1"
compute_name = "rai-cluster"

registry_name = "azureml"
ml_client_registry = MLClient(
    credential=credential,
    subscription_id=SUBSCRIPTION_ID,
    resource_group_name=RESOURCE_GROUP,
    registry_name=registry_name,
)
print(ml_client_registry)

MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7fd25c5dd930>,
         subscription_id=6c6683e9-e5fe-4038-8519-ce6ebec2ba15,
         resource_group_name=registry-builtin-prod-eastus-01,
         workspace_name=None)


In [12]:
label = "latest"

rai_constructor_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_constructor", label=label
)

# We get latest version and use the same version for all components
version = rai_constructor_component.version
print("The current version of RAI built-in components is: " + version)

rai_explanation_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_explanation", version=version
)

rai_causal_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_causal", version=version
)

rai_counterfactual_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_counterfactual", version=version
)

rai_erroranalysis_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_erroranalysis", version=version
)

rai_gather_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_gather", version=version
)

rai_scorecard_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_score_card", version=version
)

The current version of RAI built-in components is: 0.14.0


In [13]:
model_name_suffix = int(time.time())
model_name = "Academic_sucess_Bianry_classifier"

expected_model_id = f"{model_name}_{model_name_suffix}:1"
azureml_model_id = f"azureml:{expected_model_id}"

In [14]:
lgbm_component = load_component(source="../components/lgbm/lgbm.yaml")
gradient_boosting_classifier_component = load_component(source="../components/GradientBoostingClassifier/GradientBoostingClassifier.yaml")
decision_tree_classifier_component = load_component(source="../components/decision_tree_classifier/decision_tree_classifier.yaml")
logistic_regression_component = load_component(source="../components/logistic_regression/logistic_regression.yaml")

In [15]:
target_feature = "binary_target"
exclude_features = "Gender"
# "Gender" is taken out of the 'categorical_feature' - otherwise the RAI will abort but cannot take it our becuse it will abort
# so assing it a constant value in the training set just before model fit - code tested only for one variable
categorical_features = json.dumps(["Marital_status",
                                 "Daytime_evening_attendance",
                                 "Scholarship_holder",
                                 "Displaced",
                                 "Debtor",
                                 "Gender",
                                 "Tuition_fees_up_to_date",
                                 "International",
                                 "Daytime_evening_attendance",
                                 "Educational_special_needs"])

classes_in_target = json.dumps(["Graduated/Still Enrolled", "Dropout"])
#treatment_features = json.dumps(["Marital_status", "Gender",])

train_mltable = Input(
    type="mltable",
    path=f"azureml:academic_sucess_train_mltable:17",
    mode="download",
)
test_mltable = Input(
    type="mltable",
    path=f"azureml:academic_sucess_test_mltable:17",
    mode="download",
)

@dsl.pipeline(
    compute=compute_name,
    description="RAI Academic Sucess Classifier Demo NB",
    experiment_name=f"{model_name}_{model_name_suffix}",
)
def rai_demo_pipeline(target_column_name, exclude_features, training_data, test_data):
    
    ######
    #lgbm#
    ######
    lgbm_step = lgbm_component(
        target_column_name=target_column_name,
        training_data=training_data,
        test_data=test_data,
        exclude_features=exclude_features,
        model_base_name=f"{model_name}_lgbm_{model_name_suffix}"
    )
    lgbm_step.set_limits(timeout=3600)

    # Initiate the RAIInsights
    lgbm_create_rai_job = rai_constructor_component(
        title="LGBM RAI Dashboard Example",
        task_type="classification",
        model_info=f"{model_name}_lgbm_{model_name_suffix}:1",
        model_input=lgbm_step.outputs.model_output,
        train_dataset=training_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        categorical_column_names=categorical_features,
        classes=classes_in_target,
        use_model_dependency=True,
    )
    lgbm_create_rai_job.set_limits(timeout=7200)

    # Add error analysis
    lgbm_erroranalysis_job = rai_erroranalysis_component(
        rai_insights_dashboard=lgbm_create_rai_job.outputs.rai_insights_dashboard,
    )
    lgbm_erroranalysis_job.set_limits(timeout=7200)

    # Combine everything
    lgbm_rai_gather_job = rai_gather_component(
        constructor=lgbm_create_rai_job.outputs.rai_insights_dashboard,
        #insight_1=explain_job.outputs.explanation,
        #insight_2=causal_job.outputs.causal,
        #insight_3=counterfactual_job.outputs.counterfactual,
        insight_4=lgbm_erroranalysis_job.outputs.error_analysis,
    )
    lgbm_rai_gather_job.set_limits(timeout=7200)

    lgbm_rai_gather_job.outputs.dashboard.mode = "upload"
    lgbm_rai_gather_job.outputs.ux_json.mode = "upload"

    ####################################
    #decision_tree_classifier_component#
    ####################################
    tree_step = decision_tree_classifier_component(
        target_column_name=target_column_name,
        training_data=training_data,
        test_data=test_data,
        exclude_features=exclude_features,
        model_base_name=f"{model_name}_tree_{model_name_suffix}"
    )
    tree_step.set_limits(timeout=3600)

    # Initiate the RAIInsights
    tree_create_rai_job = rai_constructor_component(
        title="Tree RAI Dashboard Example",
        task_type="classification",
        model_info=f"{model_name}_tree_{model_name_suffix}:1",
        model_input=tree_step.outputs.model_output,
        train_dataset=training_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        categorical_column_names=categorical_features,
        classes=classes_in_target,
        use_model_dependency=False,
    )
    tree_create_rai_job.set_limits(timeout=7200)

    # Add error analysis
    tree_erroranalysis_job = rai_erroranalysis_component(
        rai_insights_dashboard=tree_create_rai_job.outputs.rai_insights_dashboard,
    )
    tree_erroranalysis_job.set_limits(timeout=7200)

    # Combine everything
    tree_rai_gather_job = rai_gather_component(
        constructor=tree_create_rai_job.outputs.rai_insights_dashboard,
        #insight_1=explain_job.outputs.explanation,
        #insight_2=causal_job.outputs.causal,
        #insight_3=counterfactual_job.outputs.counterfactual,
        insight_4=tree_erroranalysis_job.outputs.error_analysis,
    )
    tree_rai_gather_job.set_limits(timeout=7200)

    tree_rai_gather_job.outputs.dashboard.mode = "upload"
    tree_rai_gather_job.outputs.ux_json.mode = "upload"

    ####################################
    # logistic_regression_component    #
    ####################################
    logistic_regression_step = logistic_regression_component(
        target_column_name=target_column_name,
        training_data=training_data,
        test_data=test_data,
        exclude_features=exclude_features,
        model_base_name=f"{model_name}_logistic_regression_{model_name_suffix}"
    )
    tree_step.set_limits(timeout=3600)

    # Initiate the RAIInsights
    logistic_regression_create_rai_job = rai_constructor_component(
        title="logistic_regression RAI Dashboard Example",
        task_type="classification",
        model_info=f"{model_name}_logistic_regression_{model_name_suffix}:1",
        model_input=logistic_regression_step.outputs.model_output,
        train_dataset=training_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        categorical_column_names=categorical_features,
        classes=classes_in_target,
        use_model_dependency=False,
    )
    logistic_regression_create_rai_job.set_limits(timeout=7200)

    # Add error analysis
    logistic_regression_erroranalysis_job = rai_erroranalysis_component(
        rai_insights_dashboard=logistic_regression_create_rai_job.outputs.rai_insights_dashboard,
    )
    logistic_regression_erroranalysis_job.set_limits(timeout=7200)

    # Combine everything
    logistic_regression_rai_gather_job = rai_gather_component(
        constructor=logistic_regression_create_rai_job.outputs.rai_insights_dashboard,
        #insight_1=explain_job.outputs.explanation,
        #insight_2=causal_job.outputs.causal,
        #insight_3=counterfactual_job.outputs.counterfactual,
        insight_4=logistic_regression_erroranalysis_job.outputs.error_analysis,
    )
    logistic_regression_rai_gather_job.set_limits(timeout=7200)

    logistic_regression_rai_gather_job.outputs.dashboard.mode = "upload"
    logistic_regression_rai_gather_job.outputs.ux_json.mode = "upload"

    return {}

rai_demo_pipeline_job = rai_demo_pipeline(target_feature, exclude_features, train_mltable, test_mltable)
rai_demo_pipeline_job.settings.ForceRerun = True

In [16]:
go_job = ml_client.jobs.create_or_update(rai_demo_pipeline_job)
go_job

[32mUploading logistic_regression (0.01 MBs): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6018/6018 [00:00<00:00, 46423.30i

Experiment,Name,Type,Status,Details Page
Academic_sucess_Bianry_classifier_1717098294,olden_picture_sbk6v16y1m,pipeline,NotStarted,Link to Azure Machine Learning studio
