In [1]:
import os
import subprocess
import sys
from json import load
from re import sub

from pathlib import Path
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv

load_dotenv(
    r"C:\Users\edmar\MeusProjetos\Data_Science\Certifications\Microsoft DP100\.env"
)  # Carrega variáveis de ambiente do arquivo .env


def deploy_infrastructure(rg_name, ws_name, location):
    """
    Chama o script shell para criar a infraestrutura no Azure.
    """
    script_path = os.path.abspath("../create_infra.sh")
    if os.path.exists(script_path):
        print(f"Script found at: {script_path}")
    else:
        print(f"Error: Script not found at {script_path}")
        sys.exit(1)

    print(f"Python: Iniciando deploy para {ws_name}...")

    try:
        # Chama o script shell passando os argumentos
        # check=True lança uma exceção se o script shell falhar
        subprocess.run(
            ["bash", script_path, rg_name, ws_name, location], check=True, text=True
        )
        print("Python: Deploy finalizado com sucesso!")

    except subprocess.CalledProcessError as e:
        print(f"Erro: O script shell falhou com código {e.returncode}.")
        sys.exit(1)
    except FileNotFoundError:
        print(f"Erro: O arquivo '{script_path}' não foi encontrado.")
        sys.exit(1)


rg = os.getenv("RESOURCE_GROUP")
ws = os.getenv("WORKSPACE_NAME")
lc = os.getenv("LOCATION")


deploy_infrastructure(rg, ws, lc)

Script found at: c:\Users\edmar\MeusProjetos\Data_Science\Certifications\Microsoft DP100\create_infra.sh
Python: Iniciando deploy para mlw-dp100-labs...
Erro: O script shell falhou com código 127.


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
# 1. Autenticação (Usa login do az cli localmente ou Managed Identity na nuvem)
credential = DefaultAzureCredential()
subscription_id = os.getenv("SUBSCRIPTION_ID")
resource_group = os.getenv("RESOURCE_GROUP")
workspace_name = os.getenv("WORKSPACE_NAME")
location = os.getenv("LOCATION")

# 2. Conectar ao Workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

print(f"Conectado ao workspace: {ml_client.workspace_name}")

Class DeploymentTemplateOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Conectado ao workspace: mlw-dp100-labs


In [3]:
from azure.ai.ml.entities import AmlCompute

# Name assigned to the compute cluster
cpu_compute_target = "aml-cluster"

try:
    # let's see if the compute target already exists
    cpu_cluster = ml_client.compute.get(cpu_compute_target)
    print(
        f"You already have a cluster named {cpu_compute_target}, we'll reuse it as is."
    )

except Exception:
    print("Creating a new cpu compute target...")

    # Let's create the Azure ML compute object with the intended parameters
    cpu_cluster = AmlCompute(
        name=cpu_compute_target,
        # Azure ML Compute is the on-demand VM service
        type="amlcompute",
        # VM Family
        size="STANDARD_DS11_V2",
        # Minimum running nodes when there is no job running
        min_instances=0,
        # Nodes in cluster
        max_instances=1,
        # How many seconds will the node running after the job termination
        idle_time_before_scale_down=120,
        # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination
        tier="Dedicated",
    )

    # Now, we pass the object to MLClient's create_or_update method
    cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)

You already have a cluster named aml-cluster, we'll reuse it as is.


In [4]:
cpu_cluster = ml_client.compute.get("aml-cluster")

print(
    f"AMLCompute with name {cpu_cluster.name} has a maximum of {cpu_cluster.max_instances} nodes"
)

AMLCompute with name aml-cluster has a maximum of 1 nodes


## Create a script to train a model

To train a model, you'll first create the **training_with_cluster.py** script in this directory. The script uses the **diabetes.csv** file.

```python
import numpy as np

# import libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split

# load the diabetes dataset
print("Loading Data...")
diabetes = pd.read_csv(
    r"C:\Users\edmar\MeusProjetos\Data_Science\Certifications\Microsoft DP100\azure-ml-labs\Labs\04\src\diabetes.csv"
)

# separate features and labels
X, y = (
    diabetes[
        [
            "Pregnancies",
            "PlasmaGlucose",
            "DiastolicBloodPressure",
            "TricepsThickness",
            "SerumInsulin",
            "BMI",
            "DiabetesPedigree",
            "Age",
        ]
    ].values,
    diabetes["Diabetic"].values,
)

# split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=0
)

# set regularization hyperparameter
reg = 0.01

# train a logistic regression model
print("Training a logistic regression model with regularization rate of", reg)
model = LogisticRegression(C=1 / reg, solver="liblinear").fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print("Accuracy:", acc)

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores[:, 1])
print("AUC: " + str(auc))
```

## Run a job on a compute cluster

Now, you're ready to run the job on the compute cluster you created.

> **Note**:
> The job will take some time to start as the compute cluster will need to scale from zero to one node. Once the compute cluster is ready, the script will be run. When the job has finished, the compute cluster will scale back down to zero nodes. You can review the compute cluster's status in the **Compute** page.

In [6]:
from azure.ai.ml import command

# configure job
job = command(
    code="../src",
    command="python training_with_cluster.py",
    environment="AzureML-sklearn-1.5@latest",
    compute="aml-cluster",
    display_name="diabetes-train-cluster",
    experiment_name="diabetes-training",
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Uploading src (0.53 MBs): 100%|##########| 529108/529108 [00:01<00:00, 333428.41it/s]




Monitor your job at https://ml.azure.com/runs/joyful_rail_slcyc90vmb?wsid=/subscriptions/534bbb2d-e11d-400a-86cf-17d823e7e559/resourcegroups/azure-machine-learning-path/workspaces/mlw-dp100-labs&tid=86b31d04-0222-4e0f-adb6-c46fdd36e439


## Agora utilizando Environments:
---

Note that all curated environments have names that begin **AzureML-** (you can't use this prefix for your own environments).

To review a specific environment, you can retrieve an environment by its name and version. For example, you can retrieve the *description* and *tags* of the curated environment you used for the previous job:

In [5]:
envs = ml_client.environments.list()
for env in envs:
    print(env.name)

AzureML-ACPT-pytorch-1.13-py38-cuda11.7-gpu


In [10]:
env = ml_client.environments.get("AzureML-ACPT-pytorch-1.13-py38-cuda11.7-gpu", version=1)
print(env.description, env.tags)

Recommended environment for Deep Learning in public preview with PyTorch on Azure containing the Azure ML SDK with the latest compatible versions of Ubuntu, Python, PyTorch, CUDA\RocM, combined with optimizers like ORT Training,+DeepSpeed+MSCCL+ORT MoE and more. {'PyTorch': '1.13.0', 'GPU': 'Cuda11', 'OS': 'Ubuntu20.04', 'Training': ''}


If a curated environment doesn't include all the Python packages you need to run your script, you can create your own custom environment. By listing all necessary packages in an environment, you can easily re-run your scripts. All the dependencies are stored in the environment which you can then specify in the job configuration, independent of the compute you use.

For example, you can create an environment simply from a Docker image. Certain frameworks like PyTorch will have a public Docker image that already includes everything you need. 

Let's create an environment from a Docker image:

In [11]:
from azure.ai.ml.entities import Environment

env_docker_image = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    name="docker-image-example",
    description="Environment created from a Docker image.",
)
ml_client.environments.create_or_update(env_docker_image)

Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': 'mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'docker-image-example', 'description': 'Environment created from a Docker image.', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/534bbb2d-e11d-400a-86cf-17d823e7e559/resourceGroups/azure-machine-learning-path/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-labs/environments/docker-image-example/versions/1', 'Resource__source_path': '', 'base_path': 'c:\\Users\\edmar\\MeusProjetos\\Data_Science\\Certifications\\Microsoft DP100\\notebooks', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x000002608EE24B00>, 'serialize': <msrest.serialization.Serializer object at 0x000002608EDC7D20>, 'version': '1', 'conda_file': None, 'build': None, 

In [12]:
from azure.ai.ml import command

# configure job
job = command(
    code="../src",
    command="python training_with_cluster.py",
    environment="docker-image-example:1",
    compute="aml-cluster",
    display_name="diabetes-train-custom-env",
    experiment_name="diabetes-training",
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


Monitor your job at https://ml.azure.com/runs/brave_ear_ytbyfxnq1b?wsid=/subscriptions/534bbb2d-e11d-400a-86cf-17d823e7e559/resourcegroups/azure-machine-learning-path/workspaces/mlw-dp100-labs&tid=86b31d04-0222-4e0f-adb6-c46fdd36e439


The error message will tell you that there is no module named pandas. There are two possible causes for such an error:

- The script uses pandas but didn't import the library (`import pandas as pd`). 
- The script does import the library at the top of the script but the compute didn't have the library installed (`pip install pandas`).

After reviewing the `diabetes-training.py` script you can observe the script is correct, which means the library wasn't installed. In other words, the environment didn't include the necessary packages.

Let's create a new environment, using the base Docker image used in the previous job. Now, you'll add a conda specification to ensure the necessary packages will be installed. First, run the following cell to create the conda specification file:

``` yaml
name: basic-env-cpu
channels:
  - conda-forge
dependencies:
  - python=3.11
  - scikit-learn
  - pandas
  - numpy
  - matplotlib
```

In [13]:
from azure.ai.ml.entities import Environment

env_docker_conda = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="../src/conda-env.yml",
    name="docker-image-plus-conda-example",
    description="Environment created from a Docker image plus Conda environment.",
)
ml_client.environments.create_or_update(env_docker_conda)

Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': 'mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'docker-image-plus-conda-example', 'description': 'Environment created from a Docker image plus Conda environment.', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/534bbb2d-e11d-400a-86cf-17d823e7e559/resourceGroups/azure-machine-learning-path/providers/Microsoft.MachineLearningServices/workspaces/mlw-dp100-labs/environments/docker-image-plus-conda-example/versions/1', 'Resource__source_path': '', 'base_path': 'c:\\Users\\edmar\\MeusProjetos\\Data_Science\\Certifications\\Microsoft DP100\\notebooks', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x000002608ED6D9D0>, 'serialize': <msrest.serialization.Serializer object at 0x000002608D86FD20>, 'versi

Note that all necessary dependencies are included in the conda specification file for the script to run successfully.

Create a new environment using the base Docker image **and** the conda specification file to add the necessary dependencies. Azure Machine Learning will build the conda environment on top of the Docker image you provided. 

In [14]:
from azure.ai.ml import command

# configure job
job = command(
    code="../src",
    command="python training_with_cluster.py",
    environment="docker-image-plus-conda-example:1",
    compute="aml-cluster",
    display_name="diabetes-train-custom-env",
    experiment_name="diabetes-training",
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

[32mUploading src (0.53 MBs): 100%|##########| 529244/529244 [00:01<00:00, 324495.23it/s]
[39m



Monitor your job at https://ml.azure.com/runs/bright_tongue_c57mv0n7mv?wsid=/subscriptions/534bbb2d-e11d-400a-86cf-17d823e7e559/resourcegroups/azure-machine-learning-path/workspaces/mlw-dp100-labs&tid=86b31d04-0222-4e0f-adb6-c46fdd36e439
