# create a pipeline using components with Python SDKv2

To create a pipeline you can load registered components or components from a yaml file. Here I do both to create my pipeline.

load the registered data prep component

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

ml_client = MLClient.from_config(credential=DefaultAzureCredential(), path="./")

data_prep = ml_client.components.get(
    name = "data_prep_diabetes",
    version = <>
)

load the training component from a yaml file

In [None]:
# importing the component package

from azure.ai.ml import load_component

# loading the component from the yml file (not registered one)

train_component = load_component(source="p04_training.yaml")

create the pipeline

In [None]:
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute=<>,
    description= "diabetes pipeline",
)
def diabetes_pipeline(
    pipeline_job_input_data,
):
    #using the data_prep function like a python call with its own inputs
    data_prep_job = data_prep(
        input_data = pipeline_job_input_data
    )

    # using the train_func like a python call with its own inputs
    train_job = train_component(
        prepped_data = data_prep_job.outputs.prepped_data, # using outputs from previous step
    )

    return {
        "pipeline_job_prepped_data": data_prep_job.outputs.prepped_data,
    }


now use your pipeline definition to instantiate a pipeline with your dataset, this example loads my filepath from my data asset (uri file)

In [None]:
diabetes_data = ml_client.data.get(name="diabetes", version="1")
print(f"data asset uri: {diabetes_data.path}")

pipeline = diabetes_pipeline(
    pipeline_job_input_data = Input(type="uri_file", path=diabetes.path),
)

submit the job to our workspace

In [None]:
pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    #projects name
    experiment_name = "diabetes_classification",
    description = "my description"
)

ml_client.jobs.stream(pipeline_job.name)