In [31]:
!pip install azureml
!pip install azureml.core
!pip install azureml.widgets
!pip install azureml.pipeline





In [32]:
import os
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig, Dataset, Run
from azureml.widgets import RunDetails
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep

In [33]:
ws = Workspace.from_config()

In [34]:
keyvault = ws.get_default_keyvault()

In [35]:
default_datastore = ws.get_default_datastore()

In [36]:
dataset_trials_landscape_name = 'trials-landscape'
dataset_trials_landscape = Dataset.get_by_name(workspace=ws, name=dataset_trials_landscape_name)

### Create CPU Compute

In [37]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# CPU cluster name and sku
aml_cpu_compute_cluster_name = 'cpucluster-d4-v3'
vm_size = 'STANDARD_D4_V3'

# Check cluster does not exist already
try:
    aml_cpu_compute = ComputeTarget(workspace=ws, name=aml_cpu_compute_cluster_name)
    print('Found existing cluster {}'.format(aml_cpu_compute_cluster_name))
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                           max_nodes=4)
    aml_cpu_compute = ComputeTarget.create(ws, aml_cpu_compute_cluster_name, compute_config)

aml_cpu_compute.wait_for_completion(show_output=True)

Found existing cluster cpucluster-d4-v3
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### Define Runtime Configuration

In [38]:
# https://docs.microsoft.com/en-us/azure/container-registry/container-registry-auth-service-principal

from azureml.core.runconfig import RunConfiguration
aml_run_config = RunConfiguration()

from azureml.core.environment import Environment
# Create the environment
rp_env = Environment(name='rp')
# Enable Docker and reference an image
rp_env.docker.enabled = True
rp_env.python.user_managed_dependencies=True

rp_env.docker.base_image_registry.address = "acieurfrcaassacr.azurecr.io"
rp_env.docker.base_image_registry.username = keyvault.get_secret(name='acieurfrcaassacr-admin-user')
rp_env.docker.base_image_registry.password = keyvault.get_secret(name='acieurfrcaassacr-admin-pwd')
rp_env.docker.base_image = "acieurfrcaassacr.azurecr.io/azureml-env-base-research-platform:latest"
                          
aml_run_config.environment = rp_env
rp_env.register(workspace=ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "acieurfrcaassacr.azurecr.io/azureml-env-base-research-platform:latest",
        "baseImageRegistry": {
            "address": "acieurfrcaassacr.azurecr.io",
            "password": "AzureMlSecret=Env_796e80c7e5bf8a48cb603f229eeec578ec72443dbfe37710cc80de67228c6713_1#EnvironmentDefinition#ContainerRegistry#Password",
            "registryIdentity": null,
            "username": "acieurfrcaassacr"
        },
        "enabled": true,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "rp",
    

## Specify script source folder(s)

In [39]:
dataprep_script_folder = '../'
print('Source directory for data preparation is {}.'.format(os.path.realpath(dataprep_script_folder)))

Source directory for data preparation is D:\My Developments\research-platform\code\dataprep\trials-landscape.


### Step - Database Update

In [40]:
# Database update
step_database_update = PythonScriptStep(
    name='Database Update',
    script_name='step_database_update.py', 
    arguments=['--input', dataset_trials_landscape.as_named_input('raw_json_data').as_mount()],
    compute_target=aml_cpu_compute,
    runconfig=aml_run_config,
    source_directory=dataprep_script_folder,
    allow_reuse=True
)

print('Step Database Update created')

Step Database Update created


### Create Step List

In [41]:
steps = [step_database_update]
print("Step lists created")

Step lists created


In [42]:
aml_pipeline = Pipeline(workspace=ws, steps=steps)
print ("Pipeline is built")

Pipeline is built


In [43]:
aml_pipeline.validate()
print("Pipeline validation complete")

Step Database Update is ready to be created [a873b3ec]
Pipeline validation complete


In [44]:
# Note regenerate output, decides whether to execute the module to regenerate output at running time.
# It's by default unselected, which means if the module has been executed with the same parameters previously, 
# the system will reuse the output from last run to reduce run time.

aml_pipeline_run = Experiment(ws, 'pipeline-trials-landscape').submit(aml_pipeline, regenerate_outputs=True)
print("Experiment pipeline trials-landscape is submitted for execution")

Created step Database Update [a873b3ec][dce70199-4a3f-4942-8f17-1e250ebc302a], (This step will run and generate new outputs)
Submitted PipelineRun 2d98e5a0-fa28-410d-8f1d-845893b5364e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/pipeline-trials-landscape/runs/2d98e5a0-fa28-410d-8f1d-845893b5364e?wsid=/subscriptions/19518d47-0c8b-4829-a602-c5ced78deb3f/resourcegroups/aci-eur-frc-aa-ss-rg/workspaces/aci-eur-frc-aa-ss-mlw
Experiment pipeline trials-landscape is submitted for execution


In [45]:
RunDetails(aml_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [46]:
step_runs = pipeline_run1.get_children()
for step_run in step_runs:
    status = step_run.get_status()
    print('Script:', step_run.name, 'status:', status)
    
    # Change this if you want to see details even if the Step has succeeded.
    if status == "Failed":
        joblog = step_run.get_job_log()
        print('job log:', joblog)

NameError: name 'pipeline_run1' is not defined