In [None]:
import azureml.core
from azureml.core import Workspace, Experiment, Dataset, RunConfiguration
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.environment import CondaDependencies
from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
from azureml.widgets import RunDetails

from azureml.pipeline.core import PipelineParameter
from azureml.pipeline.core import Pipeline, PipelineRun
from azureml.pipeline.steps import PythonScriptStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n")

In [None]:
datastore = ws.get_default_datastore()

datastore.upload(src_dir='datafolder', target_path='data')  

In [None]:
dataset = Dataset.Tabular.from_delimited_files(datastore.path('data/busan_test_scaled_dataset.csv'))

weather_ds = dataset.register(workspace=ws,
                                 name='tab_pvprediction_train',
                                 description='busan test data')

In [None]:
# Choose a name for the run history container in the workspace.
experiment_name = "busan-dataset"
source_directory = "."

experiment = Experiment(ws, experiment_name)
experiment

In [None]:
# Choose a name for your cluster.
amlcompute_cluster_name = "cpu-cluster"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == "AmlCompute":
    found = True
    print("Found existing compute target.")
    compute_target = cts[amlcompute_cluster_name]

if not found:
    print("Creating a new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D2_V2",  # for GPU, use "STANDARD_NC6"
        # vm_priority = 'lowpriority', # optional
        max_nodes=4,
    )

    # Create the cluster.
    compute_target = ComputeTarget.create(
        ws, amlcompute_cluster_name, provisioning_config
    )

    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min_node_count is provided, it will use the scale settings for the cluster.
    compute_target.wait_for_completion(show_output=True, timeout_in_minutes=10)

    # For a more detailed view of current AmlCompute status, use get_status().

In [None]:
conda_dep = CondaDependencies()
conda_dep.add_pip_package("pandas")

run_config = RunConfiguration(conda_dependencies=conda_dep)

In [None]:
tabular_dataset = Dataset.get_by_name(ws, 'tab_pvprediction_train')

tabular_pipeline_param = PipelineParameter(
    name="tabular_ds_param", default_value=tabular_dataset
)
tabular_ds_consumption = DatasetConsumptionConfig(
    "tabular_dataset", tabular_pipeline_param
)

In [None]:
%%writefile train.py
from azureml.core import Run

input_tabular_ds = Run.get_context().input_datasets["tabular_dataset"]
tabular_df = input_tabular_ds.to_pandas_dataframe()
print(tabular_df)


In [None]:
train_step = PythonScriptStep(
    name="train_step",
    script_name="train.py",
    arguments=["--param1", tabular_ds_consumption],
    inputs=[tabular_ds_consumption],
    compute_target=compute_target,
    source_directory=source_directory,
    runconfig=run_config,
)

print("train_step created")

pipeline = Pipeline(workspace=ws, steps=[train_step])
print("pipeline with the train_step created")

In [None]:
# Pipeline will run with default file_ds and tabular_ds
pipeline_run = experiment.submit(pipeline)
print("Pipeline is submitted for execution")

In [None]:
RunDetails(pipeline_run).show()

In [None]:
pipeline_run.wait_for_completion()