In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%%time
import datetime as dt
import os
import pickle
import time
import warnings

from dask import compute, delayed
import datarobot as dr
import numpy as np
import pandas as pd
import pulumi
import pulumi_datarobot as datarobot
from pulumi import automation as auto

if not os.getenv("DATAROBOT_NOTEBOOK_IMAGE"):
    print("not running in DataRobot Notebook")
    from dotenv import load_dotenv
    load_dotenv("../.env", override=True)

warnings.filterwarnings("ignore")

client = dr.Client()

pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_columns", 1000)
pd.set_option("display.width", 1000)
pd.set_option("display.max_colwidth", 1000)
pd.set_option("display.precision", 8)

In [None]:
!ls ../data

In [None]:
%%time
# モデル作成
input_path = "../data/"
file_name = "opt_steel_strength.csv"
targets = ["降伏強度", "引張強度"]
group_col = False

In [None]:
%%time


def __run_autopilot(target, df, group_col):
    project = dr.Project.create(df, project_name=f"opt-{target}")

    ao = dr.AdvancedOptions()
    if group_col != False:
        pm = dr.GroupCV(holdout_pct=0, partition_key_cols=[group_col], reps=5)
    else:
        pm = dr.RandomCV(holdout_pct=0, reps=5)

    project.analyze_and_model(
        worker_count=-1,
        target=target,
        mode="quick",
        partitioning_method=pm,
        advanced_options=ao,
        max_wait=36000,
    )

    return project


def __wait_for_cv(projects):
    for i in range(len(projects)):
        try:
            project = dr.Project.get(project_id=projects[i - 1].id)
            jobs_list = project.get_all_jobs()
            for job in jobs_list:
                job.wait_for_completion(max_wait=60000)
            print("Project " + targets[i] + " completed running autopilot")
        except:
            print("Project " + targets[i] + " occured error")


df = pd.read_csv(input_path + file_name)
df.to_csv(input_path + "feature.csv", index=False)
df_feature = df.drop(targets, axis=1)

delayed_dr_projects = []
for i in range(len(targets)):
    df_ = df_feature.copy()
    df_[targets[i]] = df[targets[i]]
    temp = delayed(__run_autopilot)(targets[i], df_, group_col)
    delayed_dr_projects.append(temp)

projects = compute(delayed_dr_projects)[0]

__wait_for_cv(projects)

models = [project.get_top_model() for project in projects]
for model in models:
    print(f"Project {model.project_id}, Top model: {model.id}, {model.model_type}")
model_ids = [model.id for model in models]

In [None]:
%%time
# create quantile project

def __run_autopilot(target, df, group_col, quantile):
    client = dr.Client()
    project = dr.Project.create(df, project_name=f"opt-{target}-{quantile}")
    project_id = project.id

    payload = {
        "target": target,
        "mode": "quick",
        "targetType": "Regression",
        "cvMethod": "random",
        "holdoutPct": 20,
        "validationType": "CV",
        "reps": 5,
        "metric": "Quantile Loss",
        "blendBestModels": False,
        "prepareModelForDeployment": True,
        "quantileLevel": quantile,
    }
    response = client.patch(f"projects/{project_id}/aim/", json=payload)
    assert response.status_code == 202
    project.set_worker_count(-1)

    return project


def __wait_for_cv(projects):
    for i in range(len(projects)):
        try:
            project = dr.Project.get(project_id=projects[i - 1].id)
            jobs_list = project.get_all_jobs()
            for job in jobs_list:
                job.wait_for_completion(max_wait=60000)
            print("Project " + targets[i] + " completed running autopilot")
        except:
            print("Project " + targets[i] + " occured error")

df = pd.read_csv(input_path + file_name)
df.to_csv(input_path + "feature.csv", index=False)
df_feature = df.drop(targets, axis=1)

delayed_dr_projects = []
for q in [0.25,0.75]:
    df_ = df_feature.copy()
    df_["降伏強度"] = df["降伏強度"]
    temp = delayed(__run_autopilot)("降伏強度", df_, group_col, q)
    delayed_dr_projects.append(temp)

projects = compute(delayed_dr_projects)[0]

__wait_for_cv(projects)
for project in projects:
    project.wait_for_autopilot(check_interval=30)
models = [project.get_top_model() for project in projects]
for model in models:
    print(f"Project {model.project_id}, Top model: {model.id}, {model.model_type}")
model_quant_ids = [model.id for model in models]

In [None]:
usecase_id = os.getenv("DATAROBOT_DEFAULT_USE_CASE")
model_ids = ["67bb463d7bb3e096b730cbf9", "67bb46747b6a561d75815d5e"]
model_quant_ids = ["67bc656ba74dd0c677da05c4", "67bc660f4f5474ec75a7cc37"]
stack_name = "opt-steel-strength"
project_name = "dr-workshop"

In [None]:
def stack_up(project_name: str, stack_name: str, program: callable) -> auto.Stack:
    # create (or select if one already exists) a stack that uses our inline program
    stack = auto.create_or_select_stack(
        stack_name=stack_name, project_name=project_name, program=program
    )

    stack.refresh(on_output=print)

    stack.up(on_output=print)
    return stack


def destroy_project(stack: auto.Stack):
    """Destroy pulumi project"""
    stack_name = stack.name
    stack.destroy(on_output=print)

    stack.workspace.remove_stack(stack_name)
    print(f"stack {stack_name} in project removed")


def make_deployment():
    """Deploy a trained model onto DataRobot."""
    # ----- target 1 -----
    registered_model_1 = datarobot.RegisteredModelFromLeaderboard(
        resource_name=f"[opt]-registered-model-{targets[0]}",
        model_id=model_ids[0],
        name=f"[opt]-registered-model-{targets[0]}",
        use_case_ids=[usecase_id],
    )
    registered_model_id_1 = registered_model_1.id
    registered_model_version_id_1 = registered_model_1.version_id

    # ----- target 1 quantile 0.25 -----
    registered_model_quant_25 = datarobot.RegisteredModelFromLeaderboard(
        resource_name=f"[opt]-registered-model-{targets[0]}-quantile-0.25",
        model_id=model_quant_ids[0],
        name=f"[opt]-registered-model-{targets[0]}-quantile-0.25",
        use_case_ids=[usecase_id],
    )
    registered_model_quant_id_1 = registered_model_quant_25.id
    registered_model_quant_version_id_1 = registered_model_quant_25.version_id

    # ----- target 1 quantile 0.75 -----
    registered_model_quant_75 = datarobot.RegisteredModelFromLeaderboard(
        resource_name=f"[opt]-registered-model-{targets[0]}-quantile-0.75",
        model_id=model_quant_ids[1],
        name=f"[opt]-registered-model-{targets[0]}-quantile-0.75",
        use_case_ids=[usecase_id],
    )
    registered_model_quant_id_2 = registered_model_quant_75.id
    registered_model_quant_version_id_2 = registered_model_quant_75.version_id

    # ----- target 2 -----
    registered_model_2 = datarobot.RegisteredModelFromLeaderboard(
        resource_name=f"[opt]-registered-model-{targets[1]}",
        model_id=model_ids[1],
        name=f"[opt]-registered-model-{targets[1]}",
        use_case_ids=[usecase_id],
    )
    registered_model_id_2 = registered_model_2.id
    registered_model_version_id_2 = registered_model_2.version_id

    # ----- prediction environment -----
    prediction_environment = datarobot.PredictionEnvironment(
        resource_name="[opt]-prediction-environment",
        name="[opt]-prediction-environment",
        batch_jobs_max_concurrent=100,
        platform="datarobotServerless",
        supported_model_formats=[
            "datarobot",
            # "customModel",
        ],
    )
    prediction_environment_id = prediction_environment.id
    # ----- deploy for target 1 -----
    deployment_1 = datarobot.Deployment(
        resource_name=f"[opt]-deployment-{targets[0]}",
        label=f"[opt]-deployment-{targets[0]}",
        registered_model_version_id=registered_model_version_id_1,
        prediction_environment_id=prediction_environment_id,
        drift_tracking_settings={
            "feature_drift_enabled": False,
            "target_drift_enabled": False,
        },
        association_id_settings={
            "auto_generate_id": False,
            "column_names": ["association_id"],
            "required_in_prediction_requests": False,
        },
        predictions_data_collection_settings={
            "enabled": True,
        },
        batch_monitoring_settings={
            "enabled": False,
        },
        segment_analysis_settings={
            "enabled": True,
            "attributes": [],
        },
        use_case_ids=[usecase_id],
    )
    # ----- deploy for target 1 quantile 0.25 -----
    deployment_quant_25 = datarobot.Deployment(
        resource_name=f"[opt]-deployment-{targets[0]}-quantile-0.25",
        label=f"[opt]-deployment-{targets[0]}-quantile-0.25",
        registered_model_version_id=registered_model_quant_version_id_1,
        prediction_environment_id=prediction_environment_id,
        drift_tracking_settings={
            "feature_drift_enabled": False,
            "target_drift_enabled": False,
        },
        association_id_settings={
            "auto_generate_id": False,
            "column_names": ["association_id"],
            "required_in_prediction_requests": False,
        },
        predictions_data_collection_settings={
            "enabled": True,
        },
        batch_monitoring_settings={
            "enabled": False,
        },
        segment_analysis_settings={
            "enabled": True,
            "attributes": [],
        },
        use_case_ids=[usecase_id],
    )
    # ----- deploy for target 1 quantile 0.75 -----
    deployment_quant_75 = datarobot.Deployment(
        resource_name=f"[opt]-deployment-{targets[0]}-quantile-0.75",
        label=f"[opt]-deployment-{targets[0]}-quantile-0.75",
        registered_model_version_id=registered_model_quant_version_id_2,
        prediction_environment_id=prediction_environment_id,
        drift_tracking_settings={
            "feature_drift_enabled": False,
            "target_drift_enabled": False,
        },
        association_id_settings={
            "auto_generate_id": False,
            "column_names": ["association_id"],
            "required_in_prediction_requests": False,
        },
        predictions_data_collection_settings={
            "enabled": True,
        },
        batch_monitoring_settings={
            "enabled": False,
        },
        segment_analysis_settings={
            "enabled": True,
            "attributes": [],
        },
        use_case_ids=[usecase_id],
    )

    # ----- deploy for target 2 -----
    deployment_2 = datarobot.Deployment(
        resource_name=f"[opt]-deployment-{targets[1]}",
        label=f"[opt]-deployment-{targets[1]}",
        registered_model_version_id=registered_model_version_id_2,
        prediction_environment_id=prediction_environment_id,
        drift_tracking_settings={
            "feature_drift_enabled": False,
            "target_drift_enabled": False,
        },
        association_id_settings={
            "auto_generate_id": False,
            "column_names": ["association_id"],
            "required_in_prediction_requests": False,
        },
        predictions_data_collection_settings={
            "enabled": True,
        },
        batch_monitoring_settings={
            "enabled": False,
        },
        segment_analysis_settings={
            "enabled": True,
            "attributes": [],
        },
        use_case_ids=[usecase_id],
    )
    pulumi.export("prediction_environment_id", prediction_environment_id)
    pulumi.export(f"registered_model_id for {targets[0]}", registered_model_id_1)
    pulumi.export(
        f"registered_model_version_id for {targets[0]}", registered_model_version_id_1
    )
    pulumi.export(f"deployment_id for {targets[0]}", deployment_1.id)
    pulumi.export(f"registered_model_id for {targets[1]}", registered_model_id_2)
    pulumi.export(
        f"registered_model_version_id for {targets[1]}", registered_model_version_id_2
    )
    pulumi.export(f"deployment_id for {targets[1]}", deployment_2.id)
    pulumi.export(
        f"registered_model_id for {targets[0]} quantile 0.25",
        registered_model_quant_id_1,
    )
    pulumi.export(
        f"registered_model_version_id for {targets[0]} quantile 0.25",
        registered_model_quant_version_id_1,
    )
    pulumi.export(
        f"deployment_id for {targets[0]} quantile 0.25", deployment_quant_25.id
    )
    pulumi.export(
        f"registered_model_id for {targets[0]} quantile 0.75",
        registered_model_quant_id_2,
    )
    pulumi.export(
        f"registered_model_version_id for {targets[0]} quantile 0.75",
        registered_model_quant_version_id_2,
    )
    pulumi.export(
        f"deployment_id for {targets[0]} quantile 0.75", deployment_quant_75.id
    )

In [None]:
stack = stack_up(project_name, stack_name, program=make_deployment)

In [None]:
result = stack.outputs()
deployment_id_1 = result[f"deployment_id for {targets[0]}"].value
deployment_id_2 = result[f"deployment_id for {targets[1]}"].value

In [None]:
# stack = auto.select_stack(
#     stack_name=stack_name, project_name=project_name, program=make_deployment
# )
# destroy_project(stack)