# Connect to workspace

In [None]:
import os
import azureml.core
from azureml.core import Workspace, Experiment, Dataset, RunConfiguration
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.data.dataset_consumption_config import DatasetConsumptionConfig

print("Azure ML SDK version:", azureml.core.VERSION)

ws = Workspace.from_config()
print(ws)

# Create Pipeline

In [None]:
dataset = Dataset.get_by_name(ws, "german_credit_file")
dataset_consumption = DatasetConsumptionConfig("training_dataset", dataset).as_download()

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

runconfig = RunConfiguration()
runconfig.target = 'cpu-cluster'

train_env = Environment("train-env")
train_env.docker.enabled = True
train_env.python.conda_dependencies = CondaDependencies("../azureml/conda.yml")

runconfig.environment = train_env


train_step = PythonScriptStep(name="train-step",
                        source_directory="../",
                        script_name="train.py",
                        arguments=['--data_path', dataset_consumption],
                        inputs=[dataset_consumption],
                        runconfig=runconfig,
                        allow_reuse=False)

steps = [train_step]

pipeline = Pipeline(workspace=ws, steps=steps)
pipeline.validate()

In [None]:
pipeline_run = Experiment(ws, 'german-credit-training').submit(pipeline)
pipeline_run.wait_for_completion()

Publish pipelines to AML Workspace for reuse:

In [None]:
published_pipeline = pipeline.publish('german-credit-training')
published_pipeline
