In [None]:
import azureml.core
from azureml.core import Workspace, Experiment, RunConfiguration
from azureml.data.data_reference import DataReference
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DataReferenceConfiguration
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.widgets import RunDetails
import dotenv
%load_ext dotenv
%dotenv
import os

In [None]:
azureml.core.VERSION

In [None]:
ws = Workspace.from_config()

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException

aml_compute_target = "cpucluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D3_V2",
                                                                min_nodes = 2, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Aml Compute attached")

In [None]:
blob_datastore = ws.get_default_datastore()

In [None]:
run_config = RunConfiguration.load(name="batchspark", path=".")

In [None]:
run_config.target

In [None]:
blob_dataref = DataReference(
    datastore=blob_datastore,
    data_reference_name="blob_dataref",
    path_on_datastore="bsdata",
    mode='mount', # download files from datastore to compute target
    overwrite=True
)

In [None]:
feature_engineering_step = PythonScriptStep(
    name="feature_engineering_step",
    script_name="2a_feature_engineering.py",
    arguments=["--data_dir", blob_dataref],
    inputs=[blob_dataref],
    runconfig=run_config
)

In [None]:
train_pipeline = Pipeline(ws, steps=[feature_engineering_step])

In [None]:
run = Experiment(ws, 'pipeline_run').submit(train_pipeline, regenerate_outputs=True)

In [None]:
RunDetails(run).show()

In [None]:
run.cancel()

1_data_ingestion.py

In [None]:
script_run_config = ScriptRunConfig(source_directory=".",
                                    script="1_data_ingestion.py",
                                    run_config=run_config,
                                    arguments=["--data_dir",str(blob_datastore.as_mount())])

run = Experiment(ws, 'script_run').submit(script_run_config)
run

In [None]:
RunDetails(run).show()

2a_feature_engineering.py

In [None]:
script_run_config = ScriptRunConfig(source_directory=".",
                                    script="2a_feature_engineering.py",
                                    run_config=run_config,
                                    arguments=["--data_dir",str(blob_datastore.as_mount())])

run = Experiment(ws, 'script_run').submit(script_run_config)
run

In [None]:
RunDetails(run).show()

2b_model_building.py

In [None]:
script_run_config = ScriptRunConfig(source_directory=".",
                                    script="2b_model_building.py",
                                    run_config=run_config,
                                    arguments=["--data_dir",str(blob_datastore.as_mount())])

run = Experiment(ws, 'script_run').submit(script_run_config)
run

In [None]:
RunDetails(run).show()

2a_feature_engineering.py for scoring

In [None]:
script_run_config = ScriptRunConfig(source_directory=".",
                                    script="2a_feature_engineering.py",
                                    run_config=run_config,
                                    arguments=["--data_dir",str(blob_datastore.as_mount()),
                                               "--features_table","scoring_data",
                                               "--start_date", "2015-11-15",
                                               "--to_date", "2016-04-30"])

run = Experiment(ws, 'script_run').submit(script_run_config)
run

In [None]:
RunDetails(run).show()

3b_model_scoring.py

In [None]:
script_run_config = ScriptRunConfig(source_directory=".",
                                    script="3b_model_scoring.py",
                                    run_config=run_config,
                                    arguments=["--data_dir",str(blob_datastore.as_mount())])

run = Experiment(ws, 'script_run').submit(script_run_config)
run

In [None]:
RunDetails(run).show()