# Deploy tune limits pipeline


# Install requirements

In [1]:
import sys
!{sys.executable} -m pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


### Init workspace

In [2]:
from dotenv import dotenv_values
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
import os

env_vars = dotenv_values("env")
for key, value in env_vars.items():
     os.environ[key] = value
subscription_id = os.getenv("SUBSCRIPTION_ID")
workspace_name = os.getenv("WORKSPACE_NAME")
version = os.getenv("VERSION")
print(f"workspace_name={workspace_name}")
print(f"version={version}")

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name="DocuBank",
    workspace_name=workspace_name,
)
cpu_cluster = None

workspace_name=bankmap
version=0.0.5


### Create a compute resource to run your pipeline (Optional)


In [3]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target = "cpu-cluster-tune-lp"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new cpu compute target...")

    # Let's create the Azure Machine Learning compute object with the intended parameters
    # if you run into an out of quota error, change the size to a comparable VM that is available.
    # Learn more on https://azure.microsoft.com/en-us/pricing/details/machine-learning/.
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        # Azure Machine Learning Compute is the on-demand VM service
        type="amlcompute",
        # VM Family
        size="STANDARD_DS3_V2",
        # Minimum running nodes when there is no job running
        min_instances=0,
        # Nodes in cluster
        max_instances=1,
        # How many seconds will the node running after the job termination
        idle_time_before_scale_down=600,
        # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
        tier="LowPriority",
    )
    print(
        f"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}"
    )
    # Now, we pass the object to MLClient's create_or_update method
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

Creating a new cpu compute target...
AMLCompute with name cpu-cluster-lp will be created, with compute size STANDARD_DS3_V2


### Set Params

In [11]:
conda_file = "./conda.yaml"
code_dir = "./src"
tune_env_name = "bankmap-tune"
component_name = "bankmap_tune"

output_path=f"azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/{workspace_name}/datastores/configs/paths/"
input_path=f"azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/{workspace_name}/datastores/datacopy/paths/"

### Create environment


In [5]:
from azure.ai.ml.entities import Environment

pipeline_job_env = Environment(
    name=tune_env_name,
    description="Env to run tune function",
    tags={"bankmap-tune": version},
    conda_file=conda_file,
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
    version=version,
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

Environment with name bankmap-tune is registered to workspace, the environment version is 0.0.5


### Create component


In [12]:
from azure.ai.ml import command
from azure.ai.ml import Input, Output

outputs={"output_path": Output(type="uri_folder", mode="rw_mount", path=output_path)}
print(f"outputs {outputs}")
print(f"outputs {outputs['output_path'].path}")
tune_component = command(
    name=component_name,
    display_name="Tune bankmap params for a company",
    description="Tune limits for bankmap company",
    inputs={
        "data":  Input(type="uri_file"),
        "company": Input(type="string")
    },
    is_deterministic=False, 
    outputs=outputs,
    # The source folder of the component
    code=code_dir,
    version=version,
    command="""python tune.py \
            --company '${{inputs.company}}' \
            --input_file '${{inputs.data}}'\
            --output_path '${{outputs.output_path}}'\
            """,
    environment=f"{pipeline_job_env.name}:{pipeline_job_env.version}",
)

outputs {'output_path': {'type': 'uri_folder', 'path': 'azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/bankmap/datastores/configs/paths/', 'mode': 'rw_mount'}}
outputs azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/bankmap/datastores/configs/paths/


### Register the component in the workspace for future reuse.


In [13]:
# Now we register the component to the workspace
tune_component = ml_client.create_or_update(tune_component.component)

# Create (register) the component in your workspace
print(
    f"Component {tune_component.name} with Version {tune_component.version} is registered"
)

Component bankmap_tune with Version 0.0.5 is registered


### Optional: Test component



In [14]:
# the dsl decorator tells the sdk that we are defining an Azure Machine Learning pipeline
from azure.ai.ml import dsl, Input, Output

tune_component = ml_client.components.get(component_name)
print(f'output: {tune_component}')

@dsl.pipeline(
    compute=cpu_compute_target,
    description="tune pipeline"
)
def tune_pipeline(company, data):
    tune_job = tune_component(company=company, data=data)
    tune_job.outputs["output_path"] = Output(type="uri_folder", mode="rw_mount", path=output_path)
    # a pipeline returns a dictionary of outputs
    # keys will code for the pipeline output identifier
    print(f'output: {tune_job.outputs["output_path"]}')
    return {
        "output_path": tune_job.outputs.output_path
    }

output: name: bankmap_tune
version: 0.0.5
display_name: Tune bankmap params for a company
description: Tune limits for bankmap company
type: command
inputs:
  data:
    type: uri_file
    optional: false
  company:
    type: string
    optional: false
outputs:
  output_path:
    type: uri_folder
command: 'python tune.py             --company ''${{inputs.company}}''             --input_file
  ''${{inputs.data}}''            --output_path ''${{outputs.output_path}}''            '
environment: azureml:/subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourceGroups/DocuBank/providers/Microsoft.MachineLearningServices/workspaces/bankmap/environments/bankmap-tune/versions/0.0.5
code: azureml:/subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourceGroups/DocuBank/providers/Microsoft.MachineLearningServices/workspaces/bankmap/codes/4e43985f-f456-4328-ad10-9be4cdd6b08e/versions/1
resources:
  instance_count: 1
creation_context:
  created_at: '2023-10-25T11:46:22.304560+00:00'
  created_b

### Run job

In [15]:
# Let's instantiate the pipeline with the parameters of our choice
company=""
pipeline = tune_pipeline(
    company=company,
    data=Input(type="uri_file", path=f"azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/{workspace_name}/datastores/datacopy/paths/{company}.zip")
)

pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    experiment_name="Tune",
)
print(f'output before: {pipeline_job.outputs.output_path.path}')
pipeline_job.outputs.output_path = Output(type="uri_folder", mode="rw_mount", path=output_path)
print(f'output: {pipeline_job.outputs.output_path.path}')
print(f'output: {pipeline_job.name}')

output: ${{parent.jobs.None.outputs.output_path}}
output before: azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/bankmap/datastores/configs/paths/
output: azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/bankmap/datastores/configs/paths/
output: quiet_snake_5m964my6dp


In [16]:
ml_client.jobs.stream(pipeline_job.name)

RunId: quiet_snake_5m964my6dp
Web View: https://ml.azure.com/runs/quiet_snake_5m964my6dp?wsid=/subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/bankmap

Streaming logs/azureml/executionlogs.txt

[2023-10-25 12:03:17Z] Submitting 1 runs, first five are: 61d33c48:863417c1-7a66-405e-9e9f-1299773b59c5
[2023-10-25 12:03:55Z] Completing processing run id 863417c1-7a66-405e-9e9f-1299773b59c5.

Execution Summary
RunId: quiet_snake_5m964my6dp
Web View: https://ml.azure.com/runs/quiet_snake_5m964my6dp?wsid=/subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/bankmap



In [91]:
output = ml_client.jobs.download(name=pipeline_job.name, download_path="./res", output_name="output_path")

Downloading artifact azureml://subscriptions/ae0eff97-7885-4c1e-b23c-d8a627ef292f/resourcegroups/DocuBank/workspaces/test/datastores/workspaceblobstore/paths/azureml/fb6d3150-130e-46d2-b563-a9b0ceb73d5a/output_path/ to res/named-outputs/output_path
