In [None]:
import os
import azureml.core
from azureml.core import Workspace,Experiment,Datastore
from azureml.widgets import RunDetails
from azureml.core import Dataset
from azureml.pipeline.core import Pipeline,pipelineData
from azureml.pipeline.core import PipelineRun,StepRun,PortDataReference
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.compute import ComputeTarget,AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.reconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import Model
import warnings
warnings.filterwarnings('ignore')
print("SDK version: ",axureml.core.VERSION)

In [None]:
wx=Workspace.from_cofig(path='./')
def_blob_storage=ws.get_default_datastore()
def_blob_store.upload_files(['./data/pima-indians-diabetes.csv'],target_path="data",overwrite=True)

In [None]:
diabetes_data=Dataset.Tabular.from_delimited_files(def_blob_store.path('./data/pima-indian-diabetes.csv'))
diabetes_data=diabetes_data.register(ws,'diabetes_data')

In [None]:
aml_compute_target='demo-cluster'
try:
    aml_compute=AmlCompute(ws,aml_compute_target)
    print("found existing compute target")
except:
    print("creting new compute target")
    provisioning_config=AmlCompute.provisioning_configuration(cm_size='STANDARD_D2_V2',
                                                     min_nodes=1,
                                                     max_nodes=4)
    aml_compute=ComputeTarget.create(ws,aml_compute_target,provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
print("Azure Machine Learning Compute Attached")

In [None]:
aml_run_config=RunConfiguration()
aml_run_config.target=aml_compute
aml_run_config.environment.docket.enabled=True
aml_run_config.environment.docket.base_image='mcr.microsoft.com/azureml/base:latest'
aml_run_config.environment.python.user_managed_dependencies=False
aml_run_config.environment.pytohn.conda_dependencies=CondaDependencies.create(
conda_packages=['pandas','scikit-learn','numpy'],
pip_packages=['joblib','azureml-sdk','fusepy'],
pin_sdk_version=False)


In [None]:
raw_data=diabetes_data.as_named_input['raw_data']
train_data=PineLineData['train_data',datastore=def_blob_store].as_dataset()
test_data=PineLineData['test_data',datastore=def_blob_store].as_dataset()
scaler_file=PineLineData['scaler_file',datastore=def_blob_store].as_dataset()
model_file=PineLineData['model_file',datastore=def_blob_store].as_dataset()

In [None]:
source_directory='./prep'
step1=PythonScriptStep(name='prep_step',
                      script_name='./prep.py'
                      arguments=['--train',train_data,'--test',test_data,'--scaler',scaler_file],
                      inputs=[raw_data],
                      outputs=[train_data,test_data,scaler_file],
                      compute_target=aml_compute,
                      runconfig=aml_run_config,
                      source_directory=source_directory,
                      allow_reuse=True)

In [None]:
source_directory='./train'
step2=PythonScriptStep(name='train_step',
                      script_name='./train.py'
                      arguments=['--train',train_data,'--test',test_data,'--model',model_file],
                      inputs=[train_data,test_data],
                      outputs=[model_file],
                      compute_target=aml_compute,
                      runconfig=aml_run_config,
                      source_directory=source_directory,
                      allow_reuse=True)

In [None]:
steps=[step1,step2]

In [None]:
pipeline1=Pipeline(workspace=ws,steps=steps)

In [None]:
pipeline1.validate()

In [None]:
pipeline_run1=Experiment(ws,'diabetes_run').submit(pipeline1,regenerate_outputs=False)

In [None]:
prep_step=pipeline_run1.find_step_run('prep_step')[0]
step_run_output=prep_step.get_output('scaler_file')

port_data_reference=step_run_output().get_port_data_reference()
port_data_reference.download(local_path='.')
scaler_file=port_data_reference.path_on_datastore

In [None]:
model=model_file+'model.joblib'
scaler=scaler_file+'/scaler.joblib'
os.makedirs('model',exist_ok=True)
os.popen('cp '+model+' model')
os.popen('cp '+scaler+' model')