# Titanic Challenge - Pipeline Job

## Connect to Workspace

In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [None]:
# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

## Load Components

In [None]:
from azure.ai.ml import load_component
parent_dir = ""

prep_data = load_component(source=parent_dir + "./prep-data.yml")
train_random_forest = load_component(source=parent_dir + "./train-model.yml")
make_predictions = load_component(source=parent_dir + "./make-predictions.yml")

## Build Pipeline

In [None]:
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.dsl import pipeline

@pipeline()
def titanic_classification(titanic_data, sample_data):
    # 1 clean training data
    clean_titanic_data = prep_data(unclean_data=titanic_data)
    # 2 train model
    train_model = train_random_forest(training_data=clean_titanic_data.outputs.clean_data)
    # 3 clean submission data
    clean_sample_data = prep_data(unclean_data=sample_data)
    # 4 make predictions with clean sub data
    get_predictions = make_predictions(sample_data=clean_sample_data.outputs.clean_data,
                                       trained_model=train_model.outputs.trained_model)

    return {
        "pipeline_job_transformed_data": clean_titanic_data.outputs.clean_data,
        "pipeline_job_trained_model": train_model.outputs.trained_model,
        "pipeline_job_prediction_data": get_predictions.outputs.predictions_data,
    }

pipeline_job = titanic_classification(
    Input(type=AssetTypes.URI_FILE, path="azureml:titanic-local:1"),
    Input(type=AssetTypes.URI_FILE, path="azureml:titanic-sample-local:1")
)

## Change Pipeline Parameters

In [None]:
# set pipeline level compute
pipeline_job.settings.default_compute = "aml-cluster"
# set pipeline level datastore
pipeline_job.settings.default_datastore = "workspaceblobstore"

# print the pipeline job again to review the changes
print(pipeline_job)

## Submit Pipeline Job

In [None]:
# submit job to workspace
pipeline_job = ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="pipeline_titanic"
)
pipeline_job

---
# End of Notebook