# 1 Resources and Assets

## 1.1 Workspace

### 1.1.1 Get Workspace

In [None]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.identity import DefaultAzureCredential
import pandas as pd

ml_client = MLClient.from_config(
    DefaultAzureCredential()
)

workspace = ml_client.workspaces.get(name=ml_client.workspace_name)

output = {}
output["Workspace"] = ml_client.workspace_name
output["Subscription ID"] = ml_client.connections._subscription_id
output["Resource Group"] = workspace.resource_group
output["Location"] = workspace.location
pd.set_option("display.max_colwidth", None)
outputDf = pd.DataFrame(data=output, index=[""])
outputDf.T

### 1.1.2 Create Workspace

In [None]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.identity import DefaultAzureCredential
import json

with open(".../ws-details.json") as f:
    ws_details = json.load(f)

workspace_name = ws_details["workspace_name"]
subscription_id = ws_details["subscription_id"]
resource_group = ws_details["resource_group"]
workspace_region = ws_details["workspace_region"]

# get a handle to the subscription
ml_client = MLClient(DefaultAzureCredential(), subscription_id, resource_group)

# specify the workspace details
ws = Workspace(
    name=workspace_name,
    location="eastus",
    display_name=workspace_name,
    description="This example shows how to create a workspace",
    tags=dict(purpose="demo"),
)

ml_client.workspaces.begin_create(ws)

## 1.2 Compute

### 1.2.1 Create/Get Compute Cluster

In [None]:
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.ml.entities import AmlCompute

cluster_name = "gen-purpose"

try:
    # Retrieve an already attached Azure Machine Learning Compute.
    compute = ml_client.compute.get(cluster_name)
except ResourceNotFoundError as e:
    compute = AmlCompute(
        name=cluster_name,
        size="STANDARD_D16S_V3",
        type="amlcompute",
        min_instances=0,
        max_instances=10,
        idle_time_before_scale_down=120,
    )
    poller = ml_client.begin_create_or_update(compute)
    poller.wait()

## 1.3 Datastore

### 1.3.1 Get datastore from your workspace

In [None]:
# Get a named datastore from the current workspace
datastore = ml_client.datastores.get(name='workspaceblobstore')

# Get default datastore from the current workspace
datastore = ml_client.datastores.get_default()

### 1.3.2 Create a datastore from Azure Blob container
Refer [this link](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-resource-datastore).

## 1.4 Data assets
Refer [this link](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-assets-data).

### 1.4.1 Create MLTable

In [None]:
import yaml
import os
import shutil


def create_folder_and_ml_table(csv_file, output, delimiter=",", encoding="ascii"):
    os.makedirs(output, exist_ok=True)
    fname = os.path.split(csv_file)[-1]

    mltable = {
        "paths": [{"file": f"./{fname}"}],
        "transformations": [
            {"read_delimited": {"delimiter": delimiter, "encoding": encoding}}
        ],
    }
    with open(os.path.join(output, "MLTable"), "w") as f:
        f.write(yaml.dump(mltable))
    shutil.copy(csv_file, os.path.join(output, fname))

train_data_path = "../data/dominicks_OJ.csv"
train_mltable_path = "../data/training-mltable-folder"

create_folder_and_ml_table(train_data_path, train_mltable_path)

# Training MLTable defined locally, with local data to be uploaded
# my_training_data_input = Input(type=AssetTypes.MLTABLE, path=train_mltable_path)

### 1.4.2 Use data in a job

In [None]:
from azure.ai.ml import command
from azure.ai.ml.entities import Data
from azure.ai.ml import Input, Output
from azure.ai.ml.constants import AssetTypes

# Possible Asset Types for Data:
# AssetTypes.URI_FILE
# AssetTypes.URI_FOLDER
# AssetTypes.MLTABLE

# Possible Paths for Data:
# Blob: https://<account_name>.blob.core.windows.net/<container_name>/<folder>/<file>
# Datastore: azureml://datastores/paths/<folder>/<file>
# Data Asset: azureml:<my_data>:<version>

my_job_inputs = {
    "raw_data": Input(type=AssetTypes.URI_FOLDER, path="<path>")
}

my_job_outputs = {
    "prep_data": Output(type=AssetTypes.URI_FOLDER, path="<path>")
}

job = command(
    code="./src",  # local path where the code is stored
    command="python process_data.py --raw_data ${{inputs.raw_data}} --prep_data ${{outputs.prep_data}}",
    inputs=my_job_inputs,
    outputs=my_job_outputs,
    environment="<environment_name>:<version>",
    compute="cpu-cluster",
)

# submit the command
returned_job = ml_client.create_or_update(job)
# get a URL for the status of the job
returned_job.services["Studio"].endpoint

### 1.4.3 Read and write data in jobs
Refer [this link](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-read-write-data-v2?tabs=python).

Covers:
- Supported paths
- Supported modes

## 1.5 Model assets
Refer [this link](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-assets-model).

## 1.6 Environment

### 1.6.1 Get environment

In [None]:
environment = ml_client.environments.get(
    name="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu",
    label="latest"
)

### 1.6.2 Create environment

In [None]:
from azure.ai.ml.entities import Environment

dependencies_dir = "../dependencies"
custom_env_name = "aml-scikit-learn"

pipeline_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for demo",
    tags={"scikit-learn": "0.24.2"},
    conda_file=os.path.join(dependencies_dir, "conda.yml"),
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

# 2 Training

- [Run a script](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-command-job)
- [Local runs](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-local-runs)
- [AutoML](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-execution-automl)
- [Hyperparameter Tuning](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-execution-hyperdrive)
- Parallel run step
    - [SDK v1 vs SDK v2](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-execution-pipeline)
    - [Example Notebook](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/pipelines/1g_pipeline_with_parallel_nodes/pipeline_with_parallel_nodes.ipynb)
- Pipelines
    - [SDK v1 vs SDK v2](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-execution-pipeline)
    - [Example Notebook](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/pipelines/1b_pipeline_with_python_function_components/pipeline_with_python_function_components.ipynb)

# 3 Endpoints

- Online endpoints
    - [SDK v1 vs SDK v2](https://learn.microsoft.com/en-us/azure/machine-learning/migrate-to-v2-deploy-endpoints)
    - [Example](https://github.com/Azure/azureml-examples/tree/main/sdk/python/endpoints/online)
- Batch endpoints
    - [Example](https://github.com/Azure/azureml-examples/tree/main/sdk/python/endpoints/batch)