# Titanic Challenge as Pipeline Job

## Connect to Workspace

In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [1]:
# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

NameError: name 'MLClient' is not defined

## Load Components

In [1]:
from azure.ai.ml import load_component
parent_dir = ""

prep_data = load_component(source=parent_dir + "./prep-data.yml")
train_random_forest = load_component(source=parent_dir + "./train-model.yml")
make_predictions = load_component(source=parent_dir + "./make-predictions.yml")

## Build Pipeline

In [12]:
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.dsl import pipeline

@pipeline()
def titanic_classification(titanic_data, sample_data):
    # 1 clean training data
    clean_titanic_data = prep_data(unclean_data=titanic_data)
    # 2 train model
    train_model = train_random_forest(training_data=clean_titanic_data.outputs.clean_data)
    # 3 clean submission data
    clean_sample_data = prep_data(unclean_data=sample_data)
    # 4 make predictions with clean sub data
    get_predictions = make_predictions(sample_data=clean_sample_data.outputs.clean_data,
                                       trained_model=train_model.outputs.trained_model)

    return {
        "pipeline_job_transformed_data": clean_titanic_data.outputs.clean_data,
        "pipeline_job_trained_model": train_model.outputs.trained_model,
        "pipeline_job_prediction_data": get_predictions.outputs.predictions_data,
    }

pipeline_job = titanic_classification(
    Input(type=AssetTypes.URI_FILE, path="azureml:titanic-local:1"),
    Input(type=AssetTypes.URI_FILE, path="azureml:titanic-sample-local:1")
)

## Change Pipeline Parameters

In [13]:
# set pipeline level compute
pipeline_job.settings.default_compute = "aml-cluster"
# set pipeline level datastore
pipeline_job.settings.default_datastore = "workspaceblobstore"

# print the pipeline job again to review the changes
print(pipeline_job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


display_name: titanic_classification
type: pipeline
inputs:
  titanic_data:
    type: uri_file
    path: azureml:titanic-local:1
  sample_data:
    type: uri_file
    path: azureml:titanic-sample-local:1
outputs:
  pipeline_job_transformed_data:
    type: uri_folder
  pipeline_job_trained_model:
    type: mlflow_model
  pipeline_job_prediction_data:
    type: uri_folder
jobs:
  clean_titanic_data:
    type: command
    inputs:
      unclean_data:
        path: ${{parent.inputs.titanic_data}}
    outputs:
      clean_data: ${{parent.outputs.pipeline_job_transformed_data}}
    component:
      $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
      name: prep_data
      version: '1'
      display_name: Prepare data for training/predictions
      type: command
      inputs:
        unclean_data:
          type: uri_file
      outputs:
        clean_data:
          type: uri_folder
      command: python prep-data.py  --unclean_data ${{inputs.unclean_data}} -

## Submit Pipeline Job

In [14]:
# submit job to workspace
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline_titanic"
)
pipeline_job

[32mUploading src (0.01 MBs): 100%|██████████| 7528/7528 [00:00<00:00, 233857.37it/s]
[39m

pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.MLFlowModelJobOutput'> and will be ignored
pathOnCompute is not a known attribute of class <class 'azure.ai.ml._restclient.v2023_04_01_preview.models._models_py3.UriFolderJobOutput'> and will be ignored


Experiment,Name,Type,Status,Details Page
pipeline_titanic,patient_endive_fntg4zntxf,pipeline,NotStarted,Link to Azure Machine Learning studio


---
# End of Notebook