# Install Python packages

In [None]:
%pip -q install -U dotenv azure-ai-ml azure-identity azureml-mlflow mlflow

# Load environment variables from a .env file
secret 노출을 피하고 notebook 들간의 일관된 환경변수를 설정하기 위해 `dotenv` 을 이용한다.

In [None]:
import os
from dotenv import load_dotenv

load_dotenv(override=True)

AZURE_AML_SUBSCRIPTION_ID = os.getenv("AZURE_AML_SUBSCRIPTION_ID")
AZURE_AML_RESOURCE_GROUP = os.getenv("AZURE_AML_RESOURCE_GROUP")
AZURE_AML_WORKSPACE = os.getenv("AZURE_AML_WORKSPACE")

# Create a Azure Machine Learning cilent
Azure Machine Learning 의 Client 객체인 `MLClient` 을 생성한다. 본 예제는 Azure CLI 로그인 Credential 을 사용하고 있다. 터미널에서 `az login` 을 정상적으로 완료하여야 한다. `az` 명령어가 설치되어 있지 않다면 [Azure CLI 설치하는 방법](https://learn.microsoft.com/ko-kr/cli/azure/install-azure-cli?view=azure-cli-latest) 을 참고한다.

In [None]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

ml_client = MLClient(
    DefaultAzureCredential(),
    AZURE_AML_SUBSCRIPTION_ID,
    AZURE_AML_RESOURCE_GROUP,
    AZURE_AML_WORKSPACE,
)

# Create a Azure Machine Learning cilent
Azure Machine Learning 의 Client 객체인 `MLClient` 을 생성한다. 본 예제는 Azure CLI 로그인 Credential 을 사용하고 있다. 터미널에서 `az login` 을 정상적으로 완료하여야 한다. `az` 명령어가 설치되어 있지 않다면 [Azure CLI 설치하는 방법](https://learn.microsoft.com/ko-kr/cli/azure/install-azure-cli?view=azure-cli-latest) 을 참고한다.

In [None]:
import time
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

workspace_ml_client = MLClient(
    DefaultAzureCredential(), AZURE_AML_SUBSCRIPTION_ID, AZURE_AML_RESOURCE_GROUP, AZURE_AML_WORKSPACE
)
registry_ml_client = MLClient(
    DefaultAzureCredential(), AZURE_AML_SUBSCRIPTION_ID, AZURE_AML_RESOURCE_GROUP, registry_name="azureml",
)

experiment_name = "funtuning-mmdetection-yolof-coco"
timestamp = str(int(time.time()))

# Model Fine Tuning
AML model registry 에 등록되어 있는 모델을 사용하여 fine tuning 을 해본다.

## Create compute

In [None]:
from azure.ai.ml.entities import AmlCompute
from azure.core.exceptions import ResourceNotFoundError

cpu_cluster_name = "cpu-cluster-001"
try:
    _ = workspace_ml_client.compute.get(cpu_cluster_name)
    print(f"Found existing compute target ({cpu_cluster_name}).")
except ResourceNotFoundError:
    print(f"Creating a new compute target ({cpu_cluster_name})...")
    compute_config = AmlCompute(
        name=cpu_cluster_name,
        type="amlcompute",
        size="Standard_D12_v2",
        idle_time_before_scale_down=120,
        min_instances=0,
        max_instances=4,
    )
    workspace_ml_client.begin_create_or_update(compute_config).result()
    
gpu_cluster_name = "gpu-cluster-001"
try:
    _ = workspace_ml_client.compute.get(gpu_cluster_name)
    print(f"Found existing compute target ({gpu_cluster_name}).")
except ResourceNotFoundError:
    print(f"Creating a new compute target ({gpu_cluster_name})...")
    compute_config = AmlCompute(
        name=gpu_cluster_name,
        type="amlcompute",
        size="Standard_NC6s_v3",
        idle_time_before_scale_down=120,
        min_instances=0,
        max_instances=4,
        tier="LowPriority",
    )
    workspace_ml_client.begin_create_or_update(compute_config).result()

In [None]:
gpus_per_node = 0
gpu_count_found = False
for compute_sku in workspace_ml_client.compute.list_sizes():
    if compute_sku.name.lower() == "Standard_NC96ads_A100_v4".lower():
        gpus_per_node = compute_sku.gpus
        gpu_count_found = True

In [None]:
mmdetection_model_name = "yolof_r50_c5_8x8_1x_coco"
aml_registry_model_name = "mmd-3x-yolof_r50_c5_8x8_1x_coco"
foundation_model = max(registry_ml_client.models.list(name=aml_registry_model_name), key=lambda x: int(x.version))

## Dataset preparation

In [None]:
from IPython.display import Image

dataset_parent_dir = "./resources/training-mmd-object-detection"
dataset_dir = f"{dataset_parent_dir}/data/odFridgeObjects"
sample_image = os.path.join(dataset_dir, "images", "31.jpg")
Image(filename=sample_image)

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

my_data = Data(
    path=dataset_dir,
    type=AssetTypes.URI_FOLDER,
    description="Fridge-items images Object detection",
    name="fridge-items-images-object-detection",
)

uri_folder_data_asset = workspace_ml_client.data.create_or_update(my_data)

print(uri_folder_data_asset)
print("")
print("Path to folder in Blob Storage:")
print(uri_folder_data_asset.path)

### Convert annotation file to JSONL

In [None]:
import json
import os
import xml.etree.ElementTree as ET

# We will copy each JSONL file within its related MLTable folder
training_mltable_path = os.path.join(dataset_parent_dir, "data/training-mltable-folder")
validation_mltable_path = os.path.join(dataset_parent_dir, "data/validation-mltable-folder")

# Create the folders if they don't exist
os.makedirs(training_mltable_path, exist_ok=True)
os.makedirs(validation_mltable_path, exist_ok=True)

train_validation_ratio = 5

# Path to the training and validation files
train_annotations_file = os.path.join(training_mltable_path, "train_annotations.jsonl")
validation_annotations_file = os.path.join(
    validation_mltable_path, "validation_annotations.jsonl"
)

# Baseline of json line dictionary
json_line_sample = {
    "image_url": uri_folder_data_asset.path,
    "image_details": {"format": None, "width": None, "height": None},
    "label": [],
}

# Path to the annotations
annotations_folder = os.path.join(dataset_dir, "annotations")

# Read each annotation and convert it to jsonl line
with open(train_annotations_file, "w") as train_f:
    with open(validation_annotations_file, "w") as validation_f:
        for i, filename in enumerate(os.listdir(annotations_folder)):
            if not filename.endswith(".xml"):
                print(f"Skipping unknown file: {filename}")
                continue

            annotation_filename = os.path.join(annotations_folder, filename)
            if i % 100 == 0:
                print(f"Parsing {annotation_filename}")

            root = ET.parse(annotation_filename).getroot()
            width = int(root.find("size/width").text)
            height = int(root.find("size/height").text)

            labels = []
            for object in root.findall("object"):
                name = object.find("name").text
                xmin = object.find("bndbox/xmin").text
                ymin = object.find("bndbox/ymin").text
                xmax = object.find("bndbox/xmax").text
                ymax = object.find("bndbox/ymax").text
                isCrowd = int(object.find("difficult").text)
                labels.append(
                    {
                        "label": name,
                        "topX": float(xmin) / width,
                        "topY": float(ymin) / height,
                        "bottomX": float(xmax) / width,
                        "bottomY": float(ymax) / height,
                        "isCrowd": isCrowd,
                    }
                )
            # Build the jsonl file
            image_filename = root.find("filename").text
            _, file_extension = os.path.splitext(image_filename)
            json_line = dict(json_line_sample)
            json_line["image_url"] = json_line["image_url"] + "images/" + image_filename
            json_line["image_details"]["format"] = file_extension[1:]
            json_line["image_details"]["width"] = width
            json_line["image_details"]["height"] = height
            json_line["label"] = labels

            if i % train_validation_ratio == 0:
                # Validation annotation
                validation_f.write(json.dumps(json_line) + "\n")
            else:
                # Train annotation
                train_f.write(json.dumps(json_line) + "\n")
                
# Generate jsonl file from coco file
base_url = uri_folder_data_asset.path + "/images/"

!python resources/training-mmd-object-detection/coco2jsonl.py \
--input_coco_file_path "./resources/training-mmd-object-detection/odFridgeObjects_coco.json" \
--output_dir "./resources/training-mmd-object-detection/data/odFridgeObjects" \
--output_file_name "odFridgeObjects_from_coco.jsonl" \
--task_type "ObjectDetection" \
--base_url $base_url

In [None]:
def create_ml_table_file(filename):
    return (
        "paths:\n"
        "  - file: ./{0}\n"
        "transformations:\n"
        "  - read_json_lines:\n"
        "        encoding: utf8\n"
        "        invalid_lines: error\n"
        "        include_path_column: false\n"
        "  - convert_column_types:\n"
        "      - columns: image_url\n"
        "        column_type: stream_info"
    ).format(filename)

def save_ml_table_file(output_path, mltable_file_contents):
    with open(os.path.join(output_path, "MLTable"), "w") as f:
        f.write(mltable_file_contents)

# Create and save train mltable
train_mltable_file_contents = create_ml_table_file(
    os.path.basename(train_annotations_file)
)
save_ml_table_file(training_mltable_path, train_mltable_file_contents)

# Save train and validation mltable
validation_mltable_file_contents = create_ml_table_file(
    os.path.basename(validation_annotations_file)
)
save_ml_table_file(validation_mltable_path, validation_mltable_file_contents)

## Submit the fine tuning job using predefined component

In [None]:

from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import PipelineComponent
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes

pipeline_component_mmdetection_func = registry_ml_client.components.get(
    name="mmdetection_image_objectdetection_instancesegmentation_pipeline", label="latest"
)

deepspeed_config_path = os.path.join(dataset_parent_dir, "deepspeed_configs/zero1.json")
if not os.path.exists(deepspeed_config_path):
    print("DeepSpeed config file not found")
    deepspeed_config_path = None

pipeline_component_args = {
    # # Model import args
    "model_family": "MmDetectionImage",
    "download_from_source": False,  # True for downloading a model directly from MMDetection
    "mlflow_model": foundation_model.id,  # foundation_model.id is provided, only foundation_model gives UserErrorException: only path input is supported now but get: ...
    # "model_name": mmdetection_model_name, # specify the model_name instead of mlflow_model if you want to use a model from the mmdetection model zoo
    # Finetune args
    "task_name": "image-object-detection",
    "apply_augmentations": True,
    "number_of_workers": 8,
    "apply_deepspeed": False,
    "deepspeed_config": deepspeed_config_path,
    "apply_ort": False,
    "auto_find_batch_size": False,
    "extra_optim_args": "",
    "precision": "32",
    "random_seed": 42,
    "evaluation_strategy": "epoch",
    "evaluation_steps": 500,
    "logging_strategy": "epoch",
    "logging_steps": 500,
    "save_strategy": "epoch",
    "save_steps": 500,
    "save_total_limit": -1,
    "early_stopping": False,
    "early_stopping_patience": 1,
    "resume_from_checkpoint": False,
    "save_as_mlflow_model": True,
    # # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
    # "image_min_size": -1,
    # "image_max_size": -1,
    # "metric_for_best_model": "mean_average_precision",
    # "number_of_epochs": 15,
    # "max_steps": -1,
    # "training_batch_size": 4,
    # "validation_batch_size": 4,
    # "learning_rate": 5e-5,
    # "learning_rate_scheduler": "warmup_linear",
    # "warmup_steps": 0,
    # "optimizer": "adamw_hf",
    # "weight_decay": 0.0,
    # "gradient_accumulation_step": 1,
    # "max_grad_norm": 1.0,
    # "iou_threshold": 0.5,
    # "box_score_threshold": 0.3,
    # # Model evaluation args
    # The following parameters map to the dataset fields
    # Uncomment one or more lines below to provide specific values, if you wish you override the autoselected default values.
    # "label_column_name": "label",
    # "input_column_names": "image_url",
}
instance_count = 1

# Ensure that the user provides only one of mlflow_model or model_name
use_model_name = aml_registry_model_name
print(f"Finetuning model {use_model_name}")


@pipeline()
def create_pipeline_mmdetection():
    mmdetection_pipeline_component: PipelineComponent = pipeline_component_mmdetection_func(
        compute_model_import=cpu_cluster_name,
        compute_finetune=gpu_cluster_name,
        compute_model_evaluation=gpu_cluster_name,
        training_data=Input(type=AssetTypes.MLTABLE, path=training_mltable_path),
        validation_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),
        # test data
        # Using the same data for validation and test. If you want to use a different dataset for test, specify it below
        test_data=Input(type=AssetTypes.MLTABLE, path=validation_mltable_path),
        instance_count=instance_count,
        process_count_per_instance=gpus_per_node,
        **pipeline_component_args,
    )


    return {
        # Map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model. Registering the model is required to deploy the model to an online or batch endpoint.
        "trained_model": mmdetection_pipeline_component.outputs.mlflow_model_folder,
    }
    
created_pipeline = create_pipeline_mmdetection()

# don't use cached results from previous jobs
created_pipeline.settings.force_rerun = True

# set continue on step failure to False
created_pipeline.settings.continue_on_step_failure = False

created_pipeline.display_name = (
    use_model_name + "_mmdetection_pipeline_component_run_" + "od"
)
# Don't use cached results from previous jobs
created_pipeline.settings.force_rerun = True

print("Submitting pipeline")

mmdetection_pipeline_run = workspace_ml_client.jobs.create_or_update(
    created_pipeline, experiment_name=experiment_name
)

print(f"Pipeline created. URL: {mmdetection_pipeline_run.studio_url}")
workspace_ml_client.jobs.stream(mmdetection_pipeline_run.name)

JobException: The output streaming for the run interrupted.
But the run is still executing on the compute target. 
Details for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run

## Register the model

In [None]:
import mlflow
from mlflow.tracking.client import MlflowClient

# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = workspace_ml_client.workspaces.get(
    name=workspace_ml_client.workspace_name
).mlflow_tracking_uri
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
print(f"\nCurrent tracking uri: {mlflow.get_tracking_uri()}")

# Initialize MLFlow client
mlflow_client = MlflowClient()

In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

# Fetch the model from pipeline job output - not working, hence fetching from fine tune child job
model_path_from_job = (
    f"azureml://jobs/mmd-3x-yolof_r50_c5_8x8_1x_coco_mmdetection_pipeline_component_run_od/outputs/trained_model"
)
print(f"Path to register model: {model_path_from_job}")

finetuned_model_name = f"{use_model_name.replace('/', '-')}-fridge-objects-od"
finetuned_model_description = f"{use_model_name.replace('/', '-')} fine tuned model for fridge objects object detection"
prepare_to_register_model = Model(
    path=model_path_from_job,
    type=AssetTypes.MLFLOW_MODEL,
    name=finetuned_model_name,
    version=timestamp,  # Use timestamp as version to avoid version conflict
    description=finetuned_model_description,
)
print(f"Prepare to register model: \n{prepare_to_register_model}")

# Register the model from pipeline job output
registered_model = workspace_ml_client.models.create_or_update(
    prepare_to_register_model
)
print(f"Registered model: {registered_model}")

## Deploy the model

In [None]:
import datetime
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, OnlineRequestSettings, ProbeSettings

# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name
online_endpoint_name = "mmd-od-fridge-items-" + datetime.datetime.now().strftime("%m%d%H%M")
online_endpoint_description = f"Online endpoint for {registered_model.name}, finetuned for fridge objects object detection"
# Create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description=online_endpoint_description,
    auth_mode="key",
)
workspace_ml_client.begin_create_or_update(endpoint).result()

deployment_name = "mmd-od-fridge-mlflow-deploy"
print(registered_model.id)
print(online_endpoint_name)
print(deployment_name)

# Create a deployment
demo_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_DS3_V2",
    instance_count=1,
    request_settings=OnlineRequestSettings(
        max_concurrent_requests_per_instance=1,
        request_timeout_ms=90000,
        max_queue_wait_ms=500,
    ),
    liveness_probe=ProbeSettings(
        failure_threshold=49,
        success_threshold=1,
        timeout=299,
        period=180,
        initial_delay=180,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=10,
        success_threshold=1,
        timeout=10,
        period=10,
        initial_delay=10,
    ),
)
workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()
endpoint.traffic = {deployment_name: 100}
workspace_ml_client.begin_create_or_update(endpoint).result()

In [None]:
import base64
import json

def read_image(image_path):
    with open(image_path, "rb") as f:
        return f.read()

# Get the details for online endpoint
endpoint = workspace_ml_client.online_endpoints.get(name=online_endpoint_name)
request_file_name = "sample_request_data.json"
with open(request_file_name, "w") as request_file:
    json.dump({
        "input_data": {
            "columns": ["image"],
            "data": [base64.encodebytes(read_image(os.path.join(dataset_dir, "images", "99.jpg"))).decode("utf-8")],
        }
    }, request_file)

response = workspace_ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name=deployment_name,
    request_file=request_file_name,
)
response

In [None]:
%matplotlib inline
import json
import numpy as np
from PIL import Image
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches


img_np = mpimg.imread(sample_image)
img = Image.fromarray(img_np.astype("uint8"), "RGB")
x, y = img.size
conf_threshold = 0.6  # display top objects with confidence score > 0.6

# Set a compact figure size
fig_width = 12
fig_height = 12

# Initialize figure and axes
fig = plt.figure(figsize=(fig_width, fig_height))
gs = fig.add_gridspec(2, 1, height_ratios=[4, 1], hspace=0.2)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])

# Display the image with bounding boxes and segmentation maps
ax1.imshow(img_np)
ax1.axis("off")

# Draw bounding boxes and segmentation maps for each detection
detections = json.loads(response)
sorted_data = sorted(detections[0]["boxes"], key=lambda x: x["score"], reverse=True)
sorted_scores = []
sorted_colors = []
unique_labels = []
label_counter = {}

# draw box and label for each detection
for detect in sorted_data:
    label = detect["label"]
    box = detect["box"]
    conf_score = detect["score"]

    if conf_score > conf_threshold:
        # Modify labels to make them unique with numbering
        if label not in label_counter:
            label_counter[label] = 1
            unique_labels.append(f"{label} {label_counter[label]}")
        else:
            label_counter[label] += 1
            unique_labels.append(f"{label} {label_counter[label]}")

        current_label = unique_labels[-1]

        ymin, xmin, ymax, xmax = (
            box["topY"],
            box["topX"],
            box["bottomY"],
            box["bottomX"],
        )
        topleft_x, topleft_y = x * xmin, y * ymin
        width, height = x * (xmax - xmin), y * (ymax - ymin)
        print(
            f"{current_label}: [{round(topleft_x, 3)}, {round(topleft_y, 3)}, "
            f"{round(width, 3)}, {round(height, 3)}], {round(conf_score, 3)}"
        )

        color = np.random.rand(3)
        rect = patches.Rectangle(
            (topleft_x, topleft_y),
            width,
            height,
            linewidth=2,
            edgecolor=color,
            facecolor="none",
        )
        ax1.add_patch(rect)
        ax1.text(topleft_x, topleft_y - 10, current_label, color=color, fontsize=20)
        sorted_scores.append(conf_score)
        sorted_colors.append(color)

# Set a stylish color palette
sns.set_palette("pastel")

# Create the bar plot without x-axis and y-axis markings
barplot = sns.barplot(x=sorted_scores, y=unique_labels, palette=sorted_colors, ax=ax2)
ax2.set_xlabel("")  # Remove x-axis label
ax2.set_ylabel("")  # Remove y-axis label
ax2.set_title(f"Top {len(sorted_scores)} Object Scores", fontsize=12)

# Add scores in front of the bars
for index, value in enumerate(sorted_scores):
    barplot.text(
        value + 0.01, index, f"{value:.2f}", va="center", color="black", fontsize=10
    )

# Remove spines and ticks from the bar plot
barplot.spines["left"].set_visible(False)
barplot.spines["top"].set_visible(False)
barplot.spines["right"].set_visible(False)
barplot.spines["bottom"].set_visible(False)
barplot.tick_params(left=False, top=False, right=False, bottom=False)
barplot.xaxis.set_visible(False)  # Remove x-axis
barplot.yaxis.grid(False)  # Remove y-axis grid

# Set plot background color
fig.patch.set_facecolor("#F7F7F7")  # Light gray

plt.tight_layout()
# fig.savefig("plot.png", bbox_inches="tight")
plt.show()

In [None]:
pipeline_comp = registry_ml_client.components.get(
    name="mmdetection_image_objectdetection_instancesegmentation_pipeline",
    label="latest"
)
pipeline_comp.dump(dest="./mmdet_pipeline.yaml")