# Updated Form of the project

### The AzureML SDK version


In [None]:
import os
import random
from azure.identity import AzureCliCredential
from azure.mgmt.resource import ResourceManagementClient
from azure.mgmt.storage import StorageManagementClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient
from azure.ai.ml.dsl import pipeline
from azure.ai.ml import load_component
from azure.ai.ml.entities import AmlCompute, Environment, Model
from azure.ai.ml import command, Input
from azure.ai.ml.sweep import Choice, Normal, Uniform, RandomParameterSampling

# Initialize Azure credentials
credential = AzureCliCredential()

# Specify your Azure Machine Learning workspace details
subscription_id = '<SUBSCRIPTION_ID>'
resource_group = '<RESOURCE_GROUP>'
workspace = '<AZUREML_WORKSPACE_NAME>'

# Create an MLClient to work with the AzureML workspace
ml_client = MLClient(DefaultAzureCredential(), subscription_id, resource_group, workspace)

# Define the compute target for training
cpu_compute_target = "cpu-cluster"

# Check if the compute target exists, if not, create it
try:
    ml_client.compute.get(cpu_compute_target)
except Exception:
    print("Creating a new cpu compute target...")
    compute = AmlCompute(
        name=cpu_compute_target, size="STANDARD_D2_V2", min_instances=0, max_instances=4
    )
    ml_client.compute.begin_create_or_update(compute).result()

# Define a custom environment
custom_env_name = "sklearn-env"
job_env = Environment(
    name=custom_env_name,
    description="Custom environment for sklearn image classification",
    # Define the conda_file and image as appropriate
    conda_file=os.path.join(dependencies_dir, "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)

# Create or update the custom environment
job_env = ml_client.environments.create_or_update(job_env)

# Print information about the registered environment
print(
    f"Environment with name {job_env.name} is registered to the workspace, the environment version is {job_env.version}"
)

# Define a machine learning job
job = command(
    inputs=dict(kernel="linear", penalty=1.0),
    compute=cpu_compute_target,
    environment=f"{job_env.name}:{job_env.version}",
    code="./src/",
    command="python train_iris.py --kernel ${{inputs.kernel}} --penalty ${{inputs.penalty}}",
    experiment_name="sklearn-iris-flowers",
    display_name="sklearn-classify-iris-flower-images",
)

# Define a job for hyperparameter sweep
job_for_sweep = job(
    kernel=Choice(values=["linear", "rbf", "poly", "sigmoid"]),
    penalty=Choice(values=[0.5, 1, 1.5]),
)

# Create a sweep job
sweep_job = job_for_sweep.sweep(
    compute="cpu-cluster",
    sampling_algorithm="random",
    primary_metric="Accuracy",
    goal="Maximize",
    max_total_trials=12,
    max_concurrent_trials=4,
)

# Create or update the sweep job
returned_sweep_job = ml_client.create_or_update(sweep_job)

# Stream the output of the sweep job and wait for completion
ml_client.jobs.stream(returned_sweep_job.name)

# Refresh the status of the sweep job after streaming
returned_sweep_job = ml_client.jobs.get(name=returned_sweep_job.name)

# Check if the sweep job is completed
if returned_sweep_job.status == "Completed":
    best_run = returned_sweep_job.properties["best_child_run_id"]

    # Define a model based on the best run
    model = Model(
        path=f"azureml://jobs/{best_run}/outputs/artifacts/paths/sklearn-iris-flower-classify-model/",
        name="run-model-example",
        description="Model created from run.",
        type="custom_model",
    )
else:
    print("Sweep job status: {}. Please wait until it completes".format(returned_sweep_job.status))

# Register the model
registered_model = ml_client.models.create_or_update(model=model)

# Define a sweep for hyperparameters
command_job_for_sweep = command_job(
    learning_rate=Normal(mu=10, sigma=3),
    keep_probability=Uniform(min_value=0.05, max_value=0.1),
    batch_size=Choice(values=[16, 32, 64, 128]),
)

# Create a sweep job for hyperparameters
sweep_job = command_job_for_sweep.sweep(
    compute="cpu-cluster",
    sampling_algorithm="random",
    # Add additional parameters as needed
)

# Create or update the sweep job
returned_sweep_job = ml_client.create_or_update(sweep_job)

# Create or update an online endpoint
ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)

# Invoke the online endpoint with a sample request
ml_client.online_endpoints.invoke(
    endpoint_name=endpoint_name,
    request_file="../model-1/sample-request.json",
    local=True,
)

# Get the endpoint information
endpoint = ml_client.online_endpoints.get(endpoint_name, local=True)
scoring_uri = endpoint.scoring_uri


### The Azure AutoML version

In [None]:
import pandas as pd
import numpy as np
from azureml.core import Workspace, Dataset, Experiment
from azureml.train.automl import AutoMLConfig
from azureml.automl.core.forecasting_parameters import ForecastingParameters
import logging
from azureml.widgets import RunDetails

# Connect to your Azure Machine Learning workspace
ws = Workspace.from_config()

# Define the experiment name
experiment_name = 'automlethtime'

# Create an Azure Machine Learning experiment
experiment = Experiment(ws, experiment_name)

# Define your subscription ID, resource group, and workspace name
subscription_id = 'd7f39349-a66b-446e-aba6-0053c2cf1c11'
resource_group = 'aml-quickstarts-165287'
workspace_name = 'quick-starts-ws-165287'

# Connect to the Azure Machine Learning workspace
workspace = Workspace(subscription_id, resource_group, workspace_name)

# Get the dataset named 'ethdata' from your workspace
dataset = Dataset.get_by_name(workspace, name='ethdata')

# Load the dataset into a Pandas DataFrame
df = dataset.to_pandas_dataframe()

# Split the dataset into training and testing data
train_data = df.iloc[:1250]
test_data = df.iloc[-60:]

# Define the label column
label = "Close"

# Extract the labels from the test data
test_labels = test_data.pop(label).values

# Define forecasting parameters
forecasting_parameters = ForecastingParameters(
    time_column_name='Column1',
    forecast_horizon=60,
    time_series_id_column_names=None,  # Specify the time series group column here if needed
    frequency='H',
    target_lags='auto',
    target_rolling_window_size=10
)

# Define AutoML configuration
automl_config = AutoMLConfig(
    task='forecasting',
    primary_metric='normalized_root_mean_squared_error',
    experiment_timeout_hours=0.25,  # Increase the timeout value if necessary
    enable_early_stopping=True,
    training_data=train_data,
    label_column_name=label,
    n_cross_validations=5,
    enable_ensembling=False,
    verbosity=logging.INFO,
    model_explainability=True,  # Enable model explainability
    output_action='append_row',  # Specify the desired action for featurized data
    forecasting_parameters=forecasting_parameters
)

# Submit the experiment
remote_run = experiment.submit(automl_config)

# Show run details in Azure Machine Learning Studio
RunDetails(remote_run).show()

# Get the best run and its metrics
best_run, fitted_model = remote_run.get_output()
best_run_metrics = best_run.get_metrics()

# Print the metrics
for metric_name, metric_value in best_run_metrics.items():
    print(metric_name, metric_value)

# Register the best model
best_run.register_model(
    model_path='outputs/model.pkl',
    model_name='eth_auto',
    tags={'Training context': 'Auto ML'},
    properties={'normalized_root_mean_squared_error': best_run_metrics['normalized_root_mean_squared_error']}
)
