In [None]:
!pip install azure-ai-ml azure-identity transformers accelerate
!pip install --upgrade jupyter ipywidgets tqdm huggingface_hub transformers

In [51]:
target_huggingface_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
target_subscription_id = "6ce80806-9645-4c72-8d3b-50a3455edcfd"
target_resource_group_name = "azure-ml-northus"
target_workspace_name = "denlaiazuremlnorthus"
target_azml_model_name = "deepseek-model-qwen-1o5b"
target_azml_model_desp = "DeepSeek-R1-Distill-Qwen-1.5B for inference"
target_managed_endpoint_name = "denlai-deepseek-qwen-1o5b"
target_deployment_prefix = "nvidia-t4-4core"
target_GPU_SKU = "Standard_NC4as_T4_v3"
target_deepseek_env = "deepseek-env"

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DockerConfiguration
from azureml.core import Workspace

# Create a new Azure ML environment
env = Environment(name=target_deepseek_env)

# Set the Docker base image
env.docker.base_image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04"

# Define Conda dependencies
conda_deps = CondaDependencies()

# Add Conda and pip packages
conda_deps.add_conda_package("python=3.10")
conda_deps.add_conda_package("pip")
conda_deps.add_pip_package("torch")
conda_deps.add_pip_package("transformers")
conda_deps.add_pip_package("accelerate")
conda_deps.add_pip_package("sentencepiece")
conda_deps.add_pip_package("protobuf")
conda_deps.add_pip_package("azureml-inference-server-http")

# Assign the dependencies to the environment
env.python.conda_dependencies = conda_deps


# Get the workspace
ws = Workspace.get(
    name=target_workspace_name,
    resource_group=target_resource_group_name,
    subscription_id=target_subscription_id  # You can omit this if using a config file
)

print("Workspace loaded:", ws.name)


# Register the environment to the workspace (replace `ws` with your workspace object)
env.register(workspace=ws)



In [None]:
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

credential = DefaultAzureCredential()
ml_client = MLClient(
    credential,
    subscription_id=target_subscription_id,
    resource_group_name=target_resource_group_name,
    workspace_name=target_workspace_name
)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = target_huggingface_model_id
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,ignore_mismatched_sizes=True )
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, ignore_mismatched_sizes=True)


model.save_pretrained(f"./{target_azml_model_name}")
tokenizer.save_pretrained(f"./{target_azml_model_name}")


In [None]:
from azure.ai.ml.entities import Model

registered_model = ml_client.models.create_or_update(
    Model(
        path="f./{target_azml_model_name}",
        name=target_azml_model_name,
        type="custom_model",
        description=target_azml_model_desp
    )
)


In [None]:
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, CodeConfiguration
from datetime import datetime

# 1. Create a unique endpoint name
endpoint_name = target_managed_endpoint_name

endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    auth_mode="AADToken"
)

ml_client.begin_create_or_update(endpoint).result()

# 2. Create a unique deployment name using a prefix and timestamp
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
deployment_name = f"{target_deployment_prefix}-{timestamp}"

print("deployment_name:", deployment_name)

deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint_name,
    model=f"{target_azml_model_name}:1",  # Replace with your model version if different
    instance_type=target_GPU_SKU,  # T4 GPU
    instance_count=1,
    environment=f"{target_deepseek_env}:1",  # Optional: define if needed
    code_configuration=CodeConfiguration(code="./src", scoring_script="score.py")
)


# Deploy to the existing endpoint
ml_client.begin_create_or_update(deployment).result()



In [None]:

from azure.ai.ml.entities import ManagedOnlineEndpoint

# Get the existing endpoint
endpoint = ml_client.online_endpoints.get(name=endpoint_name)

# Set the default deployment
endpoint.defaults = {"deployment_name": deployment_name}

# Assign 100% traffic to the specified deployment
endpoint.traffic = {deployment_name: 100}

# Update the endpoint
ml_client.begin_create_or_update(endpoint).result()


In [None]:
import urllib.request
import json
from azure.identity import DefaultAzureCredential

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = { "prompt": "where does the sun rise?" , "max_new_tokens": 10240, "temperature": 0.9, "top_p": 0.7}
body = str.encode(json.dumps(data))

# Endpoint URL
url = 'https://denlai-deepseek-qwen-1o5b.northcentralus.inference.ml.azure.com/score'

# Get token using Microsoft Entra ID
credential = DefaultAzureCredential()
token = credential.get_token("https://ml.azure.com/.default").token

# Set headers with Entra ID token
headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json',
    'Authorization': f'Bearer {token}'
}

# Create and send request
req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)
    result = response.read()
    print("Model response:")
    print(result.decode("utf-8"))
except urllib.error.HTTPError as error:
    print("The request failed with status code:", error.code)
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))
