# Deploy Detectron2 resnet model to Triton Inference Server on AKS

description: In this example a pretrained resnet model from TorchVision is exported to TorchScript format. This exported model is then deployed to Triton on AKS.

Please note that this Public Preview release is subject to the [Supplemental Terms of Use for Microsoft Azure Previews](https://azure.microsoft.com/support/legal/preview-supplemental-terms/).

In [None]:
from azureml.core import Workspace

subscription_id = os.getenv("SUBSCRIPTION_ID", default="<subscription_id>")
resource_group = os.getenv("RESOURCE_GROUP", default="<resource_group>")
workspace_name = os.getenv("WORKSPACE_NAME", default="<workspace_name>")

ws = Workspace.get(
    subscription_id = subscription_id, 
    resource_group = resource_group, 
    name = workspace_name)

print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

## Download pretrained model
Download the pretrained resnet model from TorchVision

In [None]:
import torch
import torchvision.models as models

r18 = models.resnet18(pretrained=True)       # We now have an instance of the pretrained model
r18_scripted = torch.jit.script(r18)         # *** This is the TorchScript export

In [None]:
import torch.nn.functional as F

dummy_input = torch.rand(1, 3, 224, 224)     # We should run a quick test
unscripted_output = r18(dummy_input)         # Get the unscripted model's prediction...
scripted_output = r18_scripted(dummy_input)  # ...and do the same for the scripted version

unscripted_top5 = F.softmax(unscripted_output, dim=1).topk(5).indices
scripted_top5 = F.softmax(scripted_output, dim=1).topk(5).indices

print('Python model top 5 results:\n  {}'.format(unscripted_top5))
print('TorchScript model top 5 results:\n  {}'.format(scripted_top5))

In [None]:
# Save the exported model
r18_scripted.save('models/triton/resnet18/1/model.pt')

## Register model

In [None]:
from pathlib import Path
from azureml.core.model import Model

prefix = Path(".")
model_path = prefix.joinpath("models")

model = Model.register(
    model_path=model_path,
    model_name="detectron2",
    tags={"area": "image detection", "type": "detectron2 detr"},
    description="Detectron2 resnet 50 pre-trained model converted to torchscript",
    workspace=ws,
    model_framework=Model.Framework.MULTI,
    model_framework_version='21.02-py3', 
)

print(model)

## Deploy webservice

Deploy to a pre-created [AksCompute](https://docs.microsoft.com/python/api/azureml-core/azureml.core.compute.aks.akscompute?view=azure-ml-py#provisioning-configuration-agent-count-none--vm-size-none--ssl-cname-none--ssl-cert-pem-file-none--ssl-key-pem-file-none--location-none--vnet-resourcegroup-name-none--vnet-name-none--subnet-name-none--service-cidr-none--dns-service-ip-none--docker-bridge-cidr-none--cluster-purpose-none--load-balancer-type-none-) named `aks-gpu`. For other options, see [our documentation](https://docs.microsoft.com/azure/machine-learning/how-to-deploy-and-where?tabs=azcli).


In [None]:
from azureml.core.webservice import AksWebservice
from azureml.core.model import InferenceConfig
from random import randint

service_name = "detr-ncd-aks-gpu"

config = AksWebservice.deploy_configuration(
    compute_target_name="aks-gpu",
    gpu_cores=1,
    cpu_cores=1,
    memory_gb=8,
    auth_enabled=True,
)

service = Model.deploy(
    workspace=ws,
    name=service_name,
    models=[model],
    deployment_config=config,
    overwrite=True,
)

service.wait_for_deployment(show_output=True)

In [None]:
print(service.get_logs())

## Test the webservice

In [None]:
import tritonclient.http as tritonhttpclient

service_key = service.get_keys()[0]
scoring_uri = service.scoring_uri[7:]
headers = {}
headers["Authorization"] = f"Bearer {service_key}"

triton_client = tritonhttpclient.InferenceServerClient(scoring_uri)

model_name = "resnet18"

# Check the state of server.
health_ctx = triton_client.is_server_ready(headers=headers)
print("Is server ready - {}".format(health_ctx))

# Check the status of model.
status_ctx = triton_client.is_model_ready(model_name, "1", headers)
print("Is model ready - {}".format(status_ctx))

In [None]:
from tritonclient.utils import triton_to_np_dtype
import numpy as np
import torch
import torch.nn.functional as F

model_metadata = triton_client.get_model_metadata(model_name=model_name, headers=headers)

input_meta = model_metadata["inputs"]
output_meta = model_metadata["outputs"]

np_dtype = triton_to_np_dtype(input_meta[0]["datatype"])

dummy_input = torch.rand(1, 3, 224, 224)     # We should run a quick test
input_ids = np.array(dummy_input[0], dtype=np_dtype)[None,...] # make bs=1

inputs = []
outputs = []
        
# Populate the inputs array
input = tritonhttpclient.InferInput(input_meta[0]["name"], input_ids.shape, input_meta[0]["datatype"])
input.set_data_from_numpy(input_ids, binary_data=False)
inputs.append(input)

# Populate the outputs array
for out_meta in output_meta:
    output_name = out_meta["name"]
    output = tritonhttpclient.InferRequestedOutput(output_name, binary_data=False)
    outputs.append(output)

            
# Run inference
res = triton_client.infer(
    model_name,
    inputs,
    request_id="0",
    outputs=outputs,
    model_version="1",
    headers=headers,
)
    
out_data = res.as_numpy('output__0')
tensor_arr = torch.from_numpy(out_data)

hosted_top5 = F.softmax(tensor_arr, dim=1).topk(5).indices

print('TorchScript model top 5 results:\n  {}'.format(hosted_top5))

## Delete the webservice

In [None]:
service.delete()

# Next steps

Try reading [our documentation](https://aka.ms/triton-aml-docs) to use Triton with your own models or check out the other notebooks in this folder for ways to do pre- and post-processing on the server. 