In [None]:
import azureml.core
from azureml.core import Workspace, Datastore
import pandas as pd

# Set up your workspace
ws = Workspace.from_config()
ws.get_details()

# Set up your datastores
dstore = ws.get_default_datastore()

output = {}
output["SDK version"] = azureml.core.VERSION
output["Subscription ID"] = ws.subscription_id
output["Workspace"] = ws.name
output["Resource Group"] = ws.resource_group
output["Location"] = ws.location
output["Default datastore name"] = dstore.name
output["SDK Version"] = azureml.core.VERSION
pd.set_option("display.max_colwidth", None)
outputDf = pd.DataFrame(data=output, index=[""])
outputDf.T

In [None]:
from azureml.core import Experiment, Dataset

experiment = Experiment(ws, "AML-ADF-train")

print("Experiment name: " + experiment.name)

In [None]:
datastore = ws.get_default_datastore()
datastore

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

registered_inference = Dataset.Tabular.from_delimited_files(path =(datastore, "hts-sample/hts-sample-test.csv"), separator=",")

In [None]:
from azureml.pipeline.core import PipelineParameter
train_run_id = PipelineParameter(name="train_run_id", default_value="none")

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

# Name your cluster
compute_name = "cpu-cluster"


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("Found compute target: " + compute_name)
else:
    print("Creating a new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D16S_V3", max_nodes=20
    )
    # Create the compute target
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20
    )

    # For a more detailed view of current cluster status, use the 'status' property
    print(compute_target.status.serialize())

In [None]:
from azureml.data import OutputFileDatasetConfig
output = OutputFileDatasetConfig(name="processed_data", destination=(datastore, "hts-sample/divye-forecasting/")).as_upload(overwrite=True)

In [None]:
from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder
from azureml.train.automl.runtime._hts.hts_parameters import HTSInferenceParameters

inference_parameters = HTSInferenceParameters(
    hierarchy_forecast_level="store_id",  # The setting is specific to this dataset and should be changed based on your dataset.
    allocation_method="proportions_of_historical_average",
)

steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(
    experiment=experiment,
    inference_data=registered_inference,
    compute_target=compute_target,
    inference_pipeline_parameters=inference_parameters,
    node_count=4,
    process_count_per_node=8,
    output_datastore=output,
)

In [None]:
from azureml.pipeline.core import Pipeline

inference_pipeline = Pipeline(ws, steps=steps)
inference_pipeline.publish("AML-ADF-inference")