# Redemption model training pipeline

In [None]:
# import modules as if in based directory, rather than the notebook directory
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import azureml
from azureml.core import Workspace, Experiment, Dataset, RunConfiguration, Datastore, Environment
from azureml.pipeline.core import Pipeline, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep
from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
from azureml.data.datapath import DataPath
from azureml.widgets import RunDetails
    
from aml_setup.create_envs import create_env

print("Azure ML SDK version:", azureml.core.VERSION)

In [None]:
ws = Workspace.from_config()
print(f"""
WS name: {ws.name}
Region: {ws.location}
Subscription id: {ws.subscription_id}
Resource group: {ws.resource_group}
""")

## Notebook parameters

In [None]:
# Do we want to run a test run to debug?
debug = True
# Do we want to publish the pipeline for future use?
publish = True
# Given wheel path for AML environment. If none needed use None
wheel_path = "./dist/aml_setup-0.0.1-py3-none-any.whl"

pipeline_name = "AML-Setup-pipeline"
pipeline_desc = "Pipeline to test AML Setup"
experiment_name = "AML-Setup-experiment"
datastore_name_ = "XXXXXXXXXXXXXXXXXXXXXXXX"

print(f"""
debug: {debug}
publish: {publish}

wheel_path: {wheel_path} 

pipeline_name: {pipeline_name}
pipeline_desc: {pipeline_desc}
experiment_name: {experiment_name}
datastore_name: {datastore_name_}

""")

## Pipeline parameters

These are parameters that can be passed from ADF into the pipeline

In [None]:
datastore_name = PipelineParameter(name="datastore_name", default_value=datastore_name_)
datastore = Datastore(workspace=ws, name=datastore_name_)
default_ds = Dataset.File.from_files(path=DataPath(datastore, "error_you_need_to_pass_a_data_path"), validate=False) 

test_data_path = PipelineParameter(name="test_data_path", default_value=default_ds)
batch_dataset_consumption = DatasetConsumptionConfig("test_data_consumption", test_data_path).as_mount()


test_arg = PipelineParameter(name="test_arg", default_value="test_string")

## Create Environment

In [None]:
environment_dir_path = create_env.create_env_dir(ws,
                                                 env_out_dir="./envs/",
                                                 version="0.0.1",
                                                 overwrite=True,
                                                 wheel_path=wheel_path)


## Define run steps

In [None]:
env = Environment.load_from_directory(environment_dir_path)
runconfig = RunConfiguration()

runconfig.environment = env

test_env_step = PythonScriptStep(
    name = "test-env-run",
    source_directory = "./",
    script_name = "scripts/test_run.py",
    inputs=[],
    arguments = [
        '--test_arg', test_arg,
    ],
    compute_target="Standard-D8s-v3",
    runconfig=runconfig,
    allow_reuse=False,
)

In [None]:
steps = [test_env_step]

## Create pipeline object and validate 

In [None]:
pipeline = Pipeline(workspace=ws, steps=steps)
pipeline.validate()

## Submit the pipeline against an experiment

In [None]:
# For debugging only
_test_arg = '["This", "is", "a", "test", "list"]'

if debug:
    # use pipeline_parameters to override default values
    pipeline_parameters={
        "test_arg": _test_arg,
    }

    pipeline_run = Experiment(ws, experiment_name).submit(pipeline, pipeline_parameters=pipeline_parameters)
    RunDetails(pipeline_run).show()
    pipeline_run.wait_for_completion(show_output=True)

## Publish Pipeline 

In [None]:
if publish:,
    pp = pipeline.publish(name=pipeline_name, description=pipeline_desc)
    print(pp)