**End to end Machine learning project from training to deployment**

We start by importing the necessary packages and authenticating with Azure ML. DefaultAzureCredential automatically handles authentication in different environments, such as Azure notebooks or local machines with Azure CLI logged in

In [None]:
# Handle to the workspace
from azure.ai.ml import MLClient

# Authentication package
from azure.identity import DefaultAzureCredential

credential = DefaultAzureCredential()

**Explanation**:

If DefaultAzureCredential does not work, we can use InteractiveBrowserCredential to manually authenticate via a browser popup.

In [None]:
# Handle to the workspace
# from azure.ai.ml import MLClient

# Authentication package
# from azure.identity import InteractiveBrowserCredential
# credential = InteractiveBrowserCredential()

**Connect to Azure ML workspace**

Explanation:
We create an MLClient instance to interact with our workspace. This client allows us to register datasets, models, environments, and submit pipeline jobs.

In [None]:
# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id="cb51c13f-04b1-4395-8005-e4f9f2b5e397",
    resource_group_name="mlresources",
    workspace_name="MLAssessment",
)

**Create dependencies directory**

Explanation:
We create a folder to store environment dependency files like conda.yaml.

In [None]:
import os

dependencies_dir = "../dependencies"
os.makedirs(dependencies_dir, exist_ok=True)

**Define environment dependencies**

Explanation:
We define a conda.yaml file listing Python packages and pip dependencies required for training and inference.

In [None]:
%%writefile {dependencies_dir}/conda.yaml
name: model-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - numpy=1.21.2
  - pip=21.2.4
  - scikit-learn=0.24.2
  - scipy=1.7.1
  - pandas>=1.1,<1.2
  - pip:
    - inference-schema[numpy-support]==1.3.0
    - xlrd==2.0.1
    - mlflow== 1.26.1
    - azureml-mlflow==1.42.0
    - psutil>=5.8,<5.9
    - tqdm>=4.59,<4.60
    - ipykernel~=6.0
    - matplotlib

In [None]:
from azure.ai.ml.entities import Environment

custom_env_name = "aml-scikit-learnv2"

pipeline_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for Credit Card Defaults pipeline",
    tags={"scikit-learn": "0.24.2"},
    conda_file=os.path.join(dependencies_dir, "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest",
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

**Prepare source code folder**

Explanation:
We create a folder for our training scripts.

In [None]:
train_src_dir = "../src"
os.makedirs(train_dir, exist_ok=True)

**Write main training script**

**Explanation:**
The training script main.py handles:

- Loading input data

- Preprocessing

- Training a GradientBoostingClassifier

- Logging metrics with MLflow

- Saving and registering the trained model

In [None]:
%%writefile {train_src_dir}/main.py
import os
import argparse
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

def main():
    """Main function of the script."""

    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="path to input data")
    parser.add_argument("--test_train_ratio", type=float, required=False, default=0.25)
    parser.add_argument("--n_estimators", required=False, default=100, type=int)
    parser.add_argument("--learning_rate", required=False, default=0.1, type=float)
    parser.add_argument("--registered_model_name", type=str, help="model name")
    args = parser.parse_args()
   
    # Start Logging
    mlflow.start_run()

    # enable autologging
    mlflow.sklearn.autolog()

    ###################
    #<prepare the data>
    ###################
    print(" ".join(f"{k}={v}" for k, v in vars(args).items()))

    print("input data:", args.data)
    
    credit_df = pd.read_excel(args.data, header=1, index_col=0)

    mlflow.log_metric("num_samples", credit_df.shape[0])
    mlflow.log_metric("num_features", credit_df.shape[1] - 1)

    train_df, test_df = train_test_split(
        credit_df,
        test_size=args.test_train_ratio,
    )
    ####################
    #</prepare the data>
    ####################

    ##################
    #<train the model>
    ##################
    # Extracting the label column
    y_train = train_df.pop("default payment next month")

    # convert the dataframe values to array
    X_train = train_df.values

    # Extracting the label column
    y_test = test_df.pop("default payment next month")

    # convert the dataframe values to array
    X_test = test_df.values

    print(f"Training with data of shape {X_train.shape}")

    clf = GradientBoostingClassifier(
        n_estimators=args.n_estimators, learning_rate=args.learning_rate
    )
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    print(classification_report(y_test, y_pred))
    ###################
    #</train the model>
    ###################

    ##########################
    #<save and register model>
    ##########################
    # Registering the model to the workspace
    print("Registering the model via MLFlow")
    mlflow.sklearn.log_model(
        sk_model=clf,
        registered_model_name=args.registered_model_name,
        artifact_path=args.registered_model_name,
    )

    # Saving the model to a file
    mlflow.sklearn.save_model(
        sk_model=clf,
        path=os.path.join(args.registered_model_name, "trained_model"),
    )
    ###########################
    #</save and register model>
    ###########################
    
    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()

**Get data asset**

Explanation:
We retrieve the dataset registered in Azure ML to use for training.

In [None]:
# Get data asset
data_asset = ml_client.data.get(name="default_credit_card_dataset", version="1")
print(data_asset.path)

**Define training step**

Explanation:
We define a command step in Azure ML that runs our training script. This step is later used in a pipeli

In [None]:
from azure.ai.ml import command
from azure.ai.ml import Input, Output

registered_model_name = "credit_defaults_model"

train_step = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path=data_asset.path,
        ),
        test_train_ratio=0.2,
        learning_rate=0.25,
        registered_model_name=registered_model_name,
    ),
    outputs=dict(
        model_output=Output(type="uri_folder")  # <-- define output
    ),
    code="../src/",  # location of source code
    command="python main2.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}} --model_output ${{outputs.model_output}}",
    environment="aml-scikit-learnv2@latest",
    experiment_name="train_model_credit_default_prediction",
    display_name="credit_default_prediction",
)

In [None]:
# ml_client.create_or_update(job)

**Define pipeline**

Explanation:
We create a pipeline that runs the training step. Pipelines allow us to chain multiple steps, e.g., training → evaluation → deployment.

In [None]:
from azure.ai.ml import command, Input, dsl
from azure.ai.ml.entities import PipelineJob


@dsl.pipeline(
    compute="cpu-cluster1",  # replace with your cluster name
    description="Pipeline for credit default prediction",
)
def credit_default_pipeline():
    train_job = train_step()
    # later you can add more steps like:
    # evaluate_job = evaluate_component(inputs=...)
    # evaluate_job.run_after(train_job)
    return {"model_output": train_job.outputs.model_output}

# Create pipeline job instance
pipeline_job: PipelineJob = credit_default_pipeline()

# Submit pipeline job
submitted_job = ml_client.jobs.create_or_update(pipeline_job)
print(f"Pipeline job submitted: {submitted_job.name}")

**Create online endpoint**

Explanation:
We create a managed online endpoint for real-time model inference. We also generate a unique name to avoid naming conflicts.

In [None]:
import uuid

# Creating a unique name for the endpoint
online_endpoint_name = "credit-endpoint-" + str(uuid.uuid4())[:8]

In [None]:
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
)

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="this is an online endpoint",
    auth_mode="key",
    tags={
        "training_dataset": "credit_defaults",
        "model_type": "sklearn.GradientBoostingClassifier",
    },
)

endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()

print(f"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}")

**Retrieve endpoint**

Explanation:
Retrieve the online endpoint to verify its provisioning state and confirm it’s ready for deployments.

In [None]:
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

print(
    f'Endpoint "{endpoint.name}" with provisioning state "{endpoint.provisioning_state}" is retrieved'
)

**Deploy model**

Explanation:
We pick the latest registered model version and deploy it to the managed endpoint.

In [None]:
# Let's pick the latest version of the model
latest_model_version = max(
    [int(m.version) for m in ml_client.models.list(name=registered_model_name)]
)

print(latest_model_version)

In [None]:
# picking the model to deploy. Here we use the latest version of our registered model
model = ml_client.models.get(name=registered_model_name, version=latest_model_version)


# create an online deployment.
blue_deployment = ManagedOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    environment="aml-scikit-learnv2@latest",
    instance_type="Standard_DS3_v2",
    instance_count=1,
)

blue_deployment = ml_client.begin_create_or_update(blue_deployment).result()