
# Clase 12: Predicciones
## DP-100 Azure Data Scientist / DSRP
## Clase 2 Octubre

### Dataset Utilizado: Hotel Booking

https://www.kaggle.com/code/touba7/hotel-booking

In [17]:
from azure.ai.ml import MLClient, Input
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

MODEL_NAME = "dsrp-booking-model"

ml_client = MLClient.from_config(credential=DefaultAzureCredential())

model = ml_client.models.get(name=MODEL_NAME, label="latest")
model

Found the config file in: /config.json


Model({'job_name': 'dsrp-machine-learning-job-6', 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'dsrp-booking-model', 'description': None, 'tags': {'alias': 'CHAMPION'}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/models/dsrp-booking-model/versions/2', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/test1-dsrp-compute/code/Users/miguel.arquez12/dsrp-azure-data-scientist-course/notebooks', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7fb33d1d72e0>, 'serialize': <msrest.serialization.Serializer object at 0x7fb33d1d7790>, 'version': '2', 'latest_version': None, 'path': 'azureml://subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/workspaces

# 1. Batch Deployment

In [26]:
import uuid

from azure.ai.ml.entities import (
    BatchEndpoint,
    ModelBatchDeployment,
    ModelBatchDeploymentSettings,
    Model,
    AmlCompute,
    Data,
    BatchRetrySettings,
    CodeConfiguration,
    Environment,
)
from azure.ai.ml.constants import AssetTypes, BatchDeploymentOutputAction


batch_endpoint_name = f"b-booking-endpoint-" + str(uuid.uuid4())[:8]
endpoint = BatchEndpoint(
    name=batch_endpoint_name,
    description="Modelo de prediccion de cancelaciones",
)

ml_client.batch_endpoints.begin_create_or_update(endpoint).result()

BatchEndpoint({'scoring_uri': 'https://b-booking-endpoint-f2b8e991.eastus.inference.ml.azure.com/jobs', 'openapi_uri': None, 'provisioning_state': 'Succeeded', 'name': 'b-booking-endpoint-f2b8e991', 'description': 'Modelo de prediccion de cancelaciones', 'tags': {}, 'properties': {'BatchEndpointCreationApiVersion': '2023-10-01', 'azureml.onlineendpointid': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/batchEndpoints/b-booking-endpoint-f2b8e991'}, 'print_as_yaml': False, 'id': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/batchEndpoints/b-booking-endpoint-f2b8e991', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/test1-dsrp-compute/code/Users/miguel.arquez12/dsrp-azure-data-scientist-course/notebooks', 'creation_context'

### Compute cluster

In [15]:
compute_name = "dsrp-cluster"
if not any(filter(lambda m: m.name == compute_name, ml_client.compute.list())):
    compute_cluster = AmlCompute(
        name=compute_name, description="amlcompute", min_instances=0, max_instances=1,
        size="Standard_D1"
    )
    ml_client.begin_create_or_update(compute_cluster).result()

### Deployment

In [32]:
deployment = ModelBatchDeployment(
    name="booking-dsp",
    description="DSRP predicciones de cancelaciones de hoteles",
    endpoint_name=endpoint.name,
    model=model,
    compute=compute_name,
    settings=ModelBatchDeploymentSettings(
        instance_count=1,
        max_concurrency_per_instance=2,
        mini_batch_size=10,
        output_action=BatchDeploymentOutputAction.APPEND_ROW,
        output_file_name="predictions.csv",
        retry_settings=BatchRetrySettings(max_retries=3, timeout=300),
        logging_level="info",
    ),
)

ml_client.batch_deployments.begin_create_or_update(deployment).result()

BatchDeployment({'provisioning_state': 'Succeeded', 'endpoint_name': 'b-booking-endpoint-f2b8e991', 'type': None, 'name': 'booking-dsp', 'description': 'DSRP predicciones de cancelaciones de hoteles', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/batchEndpoints/b-booking-endpoint-f2b8e991/deployments/booking-dsp', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/test1-dsrp-compute/code/Users/miguel.arquez12/dsrp-azure-data-scientist-course/notebooks', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7fb33c5bf130>, 'serialize': <msrest.serialization.Serializer object at 0x7fb33c5bf400>, 'model': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml

### Prueba de Batch deployment

In [22]:
data_asset = ml_client.data.get("gold-booking-dsrp", version="2")
data_asset

Data({'path': 'azureml://subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourcegroups/aml-course-dp100-2024/workspaces/dsrp-aml-dp100/datastores/workspaceblobstore/paths/LocalUpload/655f680f08ec61bf09b93086507db61b/feature_engineering_data_PROCESSED.csv', 'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'gold-booking-dsrp', 'description': 'Tabla Final Feature Engineering', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/data/gold-booking-dsrp/versions/2', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/test1-dsrp-compute/code/Users/miguel.arquez12/dsrp-azure-data-scientist-course/notebooks', 'creation_context': <azure.ai.ml.entit

In [33]:
input = Input(type=AssetTypes.URI_FILE, path=data_asset.id)
job = ml_client.batch_endpoints.invoke(
    deployment_name=deployment.name, endpoint_name=endpoint.name, input=input
)

In [35]:
scoring_job = list(ml_client.jobs.list(parent_job_name=job.name))[0]
scoring_job

Experiment,Name,Type,Status,Details Page
21a5a163-82e6-4c9c-8601-20e1938e6f1a,294a67f1-a865-4214-9744-90142e4cead7,base,Completed,Link to Azure Machine Learning studio


In [36]:
ml_client.jobs.download(name=scoring_job.name, download_path=".", output_name="score")

Downloading artifact azureml://datastores/workspaceblobstore/paths/azureml/294a67f1-a865-4214-9744-90142e4cead7/score/ to named-outputs/score


In [40]:
import pandas as pd

score = pd.read_csv(
    "named-outputs/score/predictions.csv", names=["row", "prediction", "file"]
)
score

Unnamed: 0,row,prediction,file
0,0,1,feature_engineering_data_PROCESSED.csv
1,1,1,feature_engineering_data_PROCESSED.csv
2,2,0,feature_engineering_data_PROCESSED.csv
3,3,1,feature_engineering_data_PROCESSED.csv
4,4,1,feature_engineering_data_PROCESSED.csv
...,...,...,...
119385,119385,1,feature_engineering_data_PROCESSED.csv
119386,119386,1,feature_engineering_data_PROCESSED.csv
119387,119387,0,feature_engineering_data_PROCESSED.csv
119388,119388,1,feature_engineering_data_PROCESSED.csv


# 2. Online Deployment

In [41]:
# import required libraries
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
)


In [43]:
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "endpoint-" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is a sample online endpoint for mlflow model",
    auth_mode="key",
    tags={"foo": "bar"},
)
ml_client.begin_create_or_update(endpoint).result()


ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://endpoint-10140227990523.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://endpoint-10140227990523.eastus.inference.ml.azure.com/swagger.json', 'name': 'endpoint-10140227990523', 'description': 'this is a sample online endpoint for mlflow model', 'tags': {'foo': 'bar'}, 'properties': {'createdBy': 'miguel arquez abdala', 'createdAt': '2024-10-14T02:27:31.947663+0000', 'lastModifiedAt': '2024-10-14T02:27:31.947663+0000', 'azureml.onlineendpointid': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourcegroups/aml-course-dp100-2024/providers/microsoft.machinelearningservices/workspaces/dsrp-aml-dp100/onlineendpoints/endpoint-10140227990523', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oeidp:4b014ae2-0df9-417b-a95d

In [44]:
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    instance_type="Standard_F4s_v2",
    instance_count=1,
)
ml_client.online_deployments.begin_create_or_update(blue_deployment).result()


Check: endpoint endpoint-10140227990523 exists


..............................................................................

ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'endpoint-10140227990523', 'type': 'Managed', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/odidp:4b014ae2-0df9-417b-a95d-74c60a07cf6a:c4019d6a-399c-43f4-a366-eb37327b56d4?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/0f61d6bf-ab3d-4df7-a666-edaf42eff57c/resourceGroups/aml-course-dp100-2024/providers/Microsoft.MachineLearningServices/workspaces/dsrp-aml-dp100/onlineEndpoints/endpoint-10140227990523/deployments/blue', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/test1-dsrp-compute/code/Users/miguel.arquez12/dsrp-azure-data-scientist-course/notebooks', 'creation_context': 

## Test online deployment

In [71]:
import json
import pandas as pd

INDEX_TEST = 345


data_asset = ml_client.data.get("gold-booking-dsrp", version="5")
modeling_dataframe = pd.read_csv(data_asset.path)
modeling_dataframe.head()

_input_file = {
  "input_data": {
    "columns": modeling_dataframe.drop("is_canceled", axis=1).columns.to_list(),
    "data": [
      modeling_dataframe.drop("is_canceled", axis=1).iloc[INDEX_TEST].to_list()
    ],
    "index": [INDEX_TEST]
  }
}

with open('online-deployment-dsrp.json', 'w') as f:
    json.dump(_input_file, f)

In [72]:
# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="blue",
    request_file="online-deployment-dsrp.json",
)

'[0]'

In [62]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = _input_file

body = str.encode(json.dumps(data))

url = 'https://endpoint-10140227990523.eastus.inference.ml.azure.com/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = ''
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))


b'[0]'
