## Python Warapper to Trigger the Azure ML with R steps

### Setup and get the Dataset

In [None]:
import azureml.core
from azureml.core import Workspace, Datastore, Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Environment 
from azureml.data import OutputFileDatasetConfig

#Get default Workspace
ws = Workspace.from_config()

In [None]:
from azureml.core import Dataset

#Get the Penguin data from registered Dataset
datastore = ws.get_default_datastore()
pg_dataset = Dataset.File.from_files(datastore.path('penguin_data'))
pg_dataset

### Get the Compute and Custom Env

In [None]:
#Get the Compute
compute_name = "avisekCompute"
compute_target = ws.compute_targets[compute_name]


#Get the Custom Env
env = Environment.get(ws,name='commandstepR-env')

### Define and Trigger the Pipeline

In [None]:
from azureml.core import ScriptRunConfig
from azureml.pipeline.core import PipelineData

penguin_data = PipelineData("penguin_data", datastore=datastore)
#validated_data = PipelineData("validated_data", datastore=datastore)
#Define O/P data
validated_data = OutputFileDatasetConfig(name="validated_data", destination=(datastore, "validated_data")).as_upload(overwrite=True)
train_data = OutputFileDatasetConfig(name="train_data", destination=(datastore, "train_data")).as_upload(overwrite=True)
test_data = OutputFileDatasetConfig(name="test_data", destination=(datastore, "test_data")).as_upload(overwrite=True)
model = OutputFileDatasetConfig(name="model", destination=(datastore, "model")).as_upload(overwrite=True)

#Define Source Directory
src_dir = './'

#Define the Rscripts
process_data = ScriptRunConfig(source_directory=src_dir,
                            command=['Rscript process_data.R --penguin_data', pg_dataset.as_named_input(name="penguin_data").as_mount(), '--output_folder', validated_data],
                            compute_target=compute_target,
                            environment=env)

prepare_data = ScriptRunConfig(source_directory=src_dir,
                            command=['Rscript prepare_data.R --validated_data', validated_data, '--train_folder', train_data, '--test_folder', test_data],
                            compute_target=compute_target,
                            environment=env)

train = ScriptRunConfig(source_directory=src_dir,
                            command=['Rscript train_model_dt.R --train_data', train_data, '--model_folder', model],
                            compute_target=compute_target,
                            environment=env)

test = ScriptRunConfig(source_directory=src_dir,
                            command=['Rscript test_model_dt.R --test_data', test_data, '--model_folder', model],
                            compute_target=compute_target,
                            environment=env)

In [None]:
from azureml.pipeline.steps import CommandStep

#Define Pipeline Steps
#Process Data step
process_data_step = CommandStep(name='process_data', 
                    outputs = [validated_data],
                    runconfig=process_data)
#Prepare/Feature Engg step
prepare_data_step = CommandStep(name='prepare_data', 
                    inputs = [validated_data],
                    outputs = [train_data, test_data],
                    runconfig=prepare_data)
#Train the Model
train_step = CommandStep(name='model_training', 
                    inputs = [train_data],
                    outputs = [model],
                    runconfig=train)
#Test the model
test_step = CommandStep(name='model_scoring', 
                    inputs = [test_data, model],
                    #outputs = [model],
                    runconfig=test)


In [None]:
# list of steps to run (`compare_step` definition not shown)
poc_pipeline_R = [test_step]

from azureml.pipeline.core import Pipeline

# Build the pipeline
pipeline1 = Pipeline(workspace=ws, steps=[poc_pipeline_R])



from azureml.core import Experiment

# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'POC_PENGUIN_DATA_CMDSTEP').submit(pipeline1)
pipeline_run1.wait_for_completion()

### Register the Model

In [None]:
from azureml.core.model import Model
import os

datastore.download(os.getcwd(), prefix='model/model_dt.rds', overwrite = True)

#Get the working Dir
wkDir  = os.getcwd()
# Name of the create Directory
dataDir = "model/model_dt.rds"  
# Path
path = os.path.join(wkDir, dataDir)

myModel = Model.register(model_path=path,
                          model_name="decision_tree_model",
                          tags={'area': "penguin data", 'type': "classification"},
                          description="Decision Tree model to predict Penguin Species",
                          workspace=ws)

print('Name:', myModel.name)
print('Version:', myModel.version)