In [1]:
from azureml.core import Workspace, Experiment, Environment
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import AutoMLStep
from azureml.core import Dataset,Datastore

In [2]:
def getAMLWorkspace(name="your_workspace_name", subscription_id="your_subscription_id", resource_group="your_resource_group"):
    try:
        ws = Workspace.from_config()
    except Exception as ex:
        ws = Workspace.get(name=name,
               subscription_id=subscription_id,
               resource_group=resource_group)
    return ws

In [3]:
# Define your Azure Machine Learning workspace
workspace = getAMLWorkspace()

# Create a new experiment
experiment = Experiment(workspace, "timeseries_automl_experiment")

# Define the environment to run AutoML
environment = Environment.get(workspace=workspace, name="AzureML-AutoML")

datastore = Datastore.get(workspace,"inputdata")

## Data Preprocessing

In [6]:
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, "timeSeriesSampleData.csv")])
base_df = dataset.to_pandas_dataframe()
#base_df
base_df = base_df.dropna(subset=["DateTime"]) 
base_df
import pandas as pd
base_df.set_index(pd.DatetimeIndex(base_df["DateTime"]), inplace=True)
base_df = base_df[["hist_WindSpeed10m","solcast_forecast_windSpeed","Power"]]
df = base_df[base_df.index < "2023-07-01 00:00:00"]
df.describe()



Unnamed: 0,hist_WindSpeed10m,solcast_forecast_windSpeed,Power
count,7200.0,7200.0,7200.0
mean,4.480153,4.448831,40.911204
std,2.350875,2.218417,31.674646
min,0.1,0.7,0.0
25%,2.6,2.72,11.391667
50%,3.9,4.02,36.370833
75%,6.1,5.79,69.016667
max,12.0,12.43,99.891667


In [7]:
df.head()

Unnamed: 0_level_0,hist_WindSpeed10m,solcast_forecast_windSpeed,Power
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-06 00:00:00,3.1,3.36,23.758333
2021-01-06 01:00:00,3.0,3.35,24.3
2021-01-06 02:00:00,2.9,3.37,15.766667
2021-01-06 03:00:00,2.8,3.4,7.791667
2021-01-06 04:00:00,3.2,3.54,7.358333


In [8]:
df.isna().sum()

hist_WindSpeed10m             0
solcast_forecast_windSpeed    0
Power                         0
dtype: int64

In [9]:
df.to_csv("preprocessed.csv")
datastore.upload_files(files = ["./preprocessed.csv"],
                                target_path = "preprocessed.csv",
                                overwrite = True,
                                show_progress = True)
training_dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, "preprocessed.csv")])

"datastore.upload_files" is deprecated after version 1.0.69. Please use "FileDatasetFactory.upload_directory" instead. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 1 files
Uploading ./preprocessed.csv
Uploaded ./preprocessed.csv, 1 files out of an estimated total of 1
Uploaded 1 files


## Compute Cluster Initialization

In [10]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
amlcompute_cluster_name = "democlustereastus"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=workspace, name=amlcompute_cluster_name)
    print("Found existing cluster, use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_DS12_V2", max_nodes=6
    )
    compute_target = ComputeTarget.create(workspace, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Setup Experiment configurations

In [12]:
from azureml.automl.core.forecasting_parameters import ForecastingParameters

forecast_horizon = 24
forecasting_parameters = ForecastingParameters(
    time_column_name="DateTime",
    forecast_horizon=forecast_horizon,
    freq="H",  # Set the forecast frequency to be hourly,
    cv_step_size="auto",
)

In [13]:
# Configure AutoML settings
automl_config = AutoMLConfig(task='forecasting',
                             primary_metric='normalized_root_mean_squared_error',
                             experiment_timeout_minutes=30,
                             training_data=training_dataset,  # Replace with your training dataset reference
                             label_column_name='Power',
                             compute_target=compute_target,  # Replace with your compute target
                             enable_early_stopping=True,
                             n_cross_validations="auto",
                             forecasting_parameters=forecasting_parameters)


## Create AutoML step

In [14]:
# Create an AutoML step
automl_step = AutoMLStep(name='automl_module',
                         automl_config=automl_config,
                         outputs=[],
                         allow_reuse=True)

## Create and submit pipeline

In [15]:
# Define the pipeline
pipeline = Pipeline(workspace=workspace, steps=[automl_step])

# Submit the pipeline run
pipeline_run = experiment.submit(pipeline)
# pipeline_run.wait_for_completion(show_output=True)

Created step automl_module [f7fa8272][8208f47b-9ec0-413b-8404-0e711f9521de], (This step will run and generate new outputs)
Submitted PipelineRun 9d92982a-e14d-4f87-9b40-a9e4b64851c1
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9d92982a-e14d-4f87-9b40-a9e4b64851c1?wsid=/subscriptions/543aa660-8160-4b15-bee0-60ad9536dae6/resourcegroups/demoaml/workspaces/demoaml&tid=cac48661-42c6-4b49-9bc5-7a90afc64f04


## Inference

In [16]:
# Prepare inference data
inference_data = base_df[base_df.index>="2023-07-01 00:00:00"]
inference_data = inference_data[inference_data.index<"2023-07-02 00:00:00"]
inference_data["DateTime"] = inference_data.index

In [17]:
from azureml.train.automl.run import AutoMLRun
automl_run = AutoMLRun(experiment=workspace.experiments['timeseries_automl_experiment'], run_id='c12732a1-4163-4261-8c97-0d402036261f')

In [20]:
all_models,fitted_model = automl_run.get_output()

Package:azureml-automl-runtime, training version:1.52.0.post1, current version:1.51.0.post1
Package:azureml-core, training version:1.52.0, current version:1.51.0
Package:azureml-dataprep, training version:4.11.4, current version:4.10.8
Package:azureml-dataprep-rslex, training version:2.18.4, current version:2.17.12
Package:azureml-dataset-runtime, training version:1.52.0, current version:1.51.0
Package:azureml-defaults, training version:1.52.0, current version:1.51.0
Package:azureml-interpret, training version:1.52.0, current version:1.51.0
Package:azureml-mlflow, training version:1.52.0, current version:1.51.0
Package:azureml-pipeline-core, training version:1.52.0, current version:1.51.0
Package:azureml-responsibleai, training version:1.52.0, current version:1.51.0
Package:azureml-telemetry, training version:1.52.0, current version:1.51.0
Package:azureml-train-automl-client, training version:1.52.0, current version:1.51.0.post1
Package:azureml-train-automl-runtime, training version:1.

In [21]:
all_models

Experiment,Id,Type,Status,Details Page,Docs Page
timeseries_automl_experiment,c12732a1-4163-4261-8c97-0d402036261f_8,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [18]:
# Extract metrics and model
_,fitted_model = automl_run.get_output()

# Perform forecasts
y_pred, xy = fitted_model.forecast(inference_data)

Package:azureml-automl-runtime, training version:1.52.0.post1, current version:1.51.0.post1
Package:azureml-core, training version:1.52.0, current version:1.51.0
Package:azureml-dataprep, training version:4.11.4, current version:4.10.8
Package:azureml-dataprep-rslex, training version:2.18.4, current version:2.17.12
Package:azureml-dataset-runtime, training version:1.52.0, current version:1.51.0
Package:azureml-defaults, training version:1.52.0, current version:1.51.0
Package:azureml-interpret, training version:1.52.0, current version:1.51.0
Package:azureml-mlflow, training version:1.52.0, current version:1.51.0
Package:azureml-pipeline-core, training version:1.52.0, current version:1.51.0
Package:azureml-responsibleai, training version:1.52.0, current version:1.51.0
Package:azureml-telemetry, training version:1.52.0, current version:1.51.0
Package:azureml-train-automl-client, training version:1.52.0, current version:1.51.0.post1
Package:azureml-train-automl-runtime, training version:1.

In [19]:
len(y_pred)

24

In [70]:
xy[["hist_WindSpeed10m","solcast_forecast_windSpeed","_automl_target_col"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,hist_WindSpeed10m,solcast_forecast_windSpeed,_automl_target_col
DateTime,_automl_dummy_grain_col,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-07-01 00:00:00,_automl_dummy_grain_col,2.8,2.93,6.56
2023-07-01 01:00:00,_automl_dummy_grain_col,3.2,2.67,0.22
2023-07-01 02:00:00,_automl_dummy_grain_col,3.6,2.43,-2.61
2023-07-01 03:00:00,_automl_dummy_grain_col,3.5,2.24,-0.26
2023-07-01 04:00:00,_automl_dummy_grain_col,3.1,2.27,2.52
2023-07-01 05:00:00,_automl_dummy_grain_col,2.8,2.54,12.71
2023-07-01 06:00:00,_automl_dummy_grain_col,2.5,2.82,14.08
2023-07-01 07:00:00,_automl_dummy_grain_col,2.4,3.02,19.91
2023-07-01 08:00:00,_automl_dummy_grain_col,2.2,3.2,43.85
2023-07-01 09:00:00,_automl_dummy_grain_col,2.1,3.32,38.92


In [None]:
#  helo below are the changes suggested by vishnu
# do the needful 