In [1]:
import sys, os

sys.path.append(os.environ["BUILD_WORKSPACE_DIRECTORY"])
from lib.ml.inference.tasks.object_detection_2d.yolov5.post_processing_model import (
    transform_and_post_process,
    unpack_observations,
)
from lib.ml.inference.tasks.object_detection_2d.yolov5.utils import (
    preprocess_image,
)
from core.utils.aws_utils import (
    download_to_file,
    separate_bucket_from_relative_path,
)

In [2]:
import tempfile
import torch


def get_yolo_model(use_s3: bool = False):
    model_path = "s3://voxel-users/common/yolo/automated/2021-12-06-00-00-00-0000-yolo/best.torchscript.pt"
    named_temporary_file = tempfile.NamedTemporaryFile()
    bucket, path = separate_bucket_from_relative_path(model_path)
    local_model_path = (
        "/home/tim/tmp/modesto_experiment/best_960.torchscript.pt"
    )
    if use_s3:
        download_to_file(bucket, path, named_temporary_file.name)
        local_model_path = named_temporary_file.name

    yolo_model = torch.jit.load(local_model_path)
    yolo_model = yolo_model.to("cuda")
    return yolo_model

In [3]:
import typing


class YoloWithPostProcessing(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(
        self,
        input_tensor: torch.Tensor,
        offset: torch.Tensor,
        scale: torch.Tensor,
        classes: torch.Tensor,
        confidence_threshold: torch.Tensor,
        nms_threshold: torch.Tensor,
    ):
        prediction = self.model(input_tensor)[0]
        return transform_and_post_process(
            prediction,
            offset,
            scale,
            classes,
            confidence_threshold,
            nms_threshold,
        )


yolo_model = get_yolo_model()

composed_model = YoloWithPostProcessing(yolo_model)

In [4]:
import cv2


def get_example_image():
    # rickroll image
    img_url = (
        "https://i.kym-cdn.com/photos/images/facebook/000/377/946/0b9.jpg"
    )
    # https://stackoverflow.com/questions/30229231/python-save-image-from-url
    import requests

    img_data = requests.get(img_url).content
    with tempfile.NamedTemporaryFile(suffix=".jpg") as tmp:
        with open(tmp.name, "wb") as handler:
            handler.write(img_data)

        return cv2.imread(tmp.name)

In [5]:
example_image = get_example_image()

In [6]:
sample_input_shape = [480, 960]  # height, width from configs
import numpy as np

# images are height, width, channels in opencv
batched_input = torch.from_numpy(get_example_image()).unsqueeze(0)
processed, offset, scale = preprocess_image(
    batched_input, sample_input_shape, "cuda"
)

In [7]:
scripted_model = torch.jit.script(
    composed_model,
    example_inputs=[
        (
            processed,
            offset,
            scale,
            torch.tensor([0, 1]),
            torch.tensor([0.7]),
            torch.tensor([0.2]),
        )
    ],
)



In [8]:
scripted_model

RecursiveScriptModule(
  original_name=YoloWithPostProcessing
  (model): RecursiveScriptModule(
    original_name=Model
    (model): RecursiveScriptModule(
      original_name=Sequential
      (0): RecursiveScriptModule(
        original_name=Conv
        (conv): RecursiveScriptModule(original_name=Conv2d)
        (act): RecursiveScriptModule(original_name=SiLU)
      )
      (1): RecursiveScriptModule(
        original_name=Conv
        (conv): RecursiveScriptModule(original_name=Conv2d)
        (act): RecursiveScriptModule(original_name=SiLU)
      )
      (2): RecursiveScriptModule(
        original_name=C3
        (cv1): RecursiveScriptModule(
          original_name=Conv
          (conv): RecursiveScriptModule(original_name=Conv2d)
          (act): RecursiveScriptModule(original_name=SiLU)
        )
        (cv2): RecursiveScriptModule(
          original_name=Conv
          (conv): RecursiveScriptModule(original_name=Conv2d)
          (act): RecursiveScriptModule(original_name=Si

In [9]:
from loguru import logger

try:
    torch.onnx.export(
        scripted_model,
        args=((processed, offset, scale, [0, 1], 0.7, 0.2),),
        f="foo.onnx",
    )
except RuntimeError as e:
    print(f"Model was not able to be traced with error {e}")
    logger.exception("error was found")

2023-04-11 17:54:15.542 | ERROR    | __main__:<module>:11 - error was found
Traceback (most recent call last):

  File "/home/tim/.cache/bazel/_bazel_tim/2d5638d889f325f1ad1d1c69ba2aada2/external/python_x86_64-unknown-linux-gnu/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
           │         │     └ {'__name__': '__main__', '__doc__': 'Entry point for launching an IPython kernel.\n\nThis is separate from the ipykernel pack...
           │         └ <code object <module> at 0x7f64785b2ea0, file "/home/tim/.cache/bazel/_bazel_tim/2d5638d889f325f1ad1d1c69ba2aada2/execroot/vo...
           └ <function _run_code at 0x7f6478577550>
  File "/home/tim/.cache/bazel/_bazel_tim/2d5638d889f325f1ad1d1c69ba2aada2/external/python_x86_64-unknown-linux-gnu/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
         │     └ {'__name__': '__main__', '__doc__': 'Entry point for launching an IPython kernel.\n\nThis is sepa

Model was not able to be traced with error input_values.size() == param_count_list.size() INTERNAL ASSERT FAILED at "../torch/csrc/jit/python/script_init.cpp":491, please report a bug to PyTorch.  input_values:170 vs param_count_list:165


In [10]:
cuda_scripted = scripted_model.to("cuda")

In [11]:
cuda_scripted_inputs = [
    tuple(
        tensor.to("cuda")
        for tensor in (
            processed,
            torch.tensor([offset]),
            torch.tensor(scale),
            torch.tensor([0, 1]),
            torch.tensor([0.7]),
            torch.tensor([0.2]),
        )
    )
]

In [12]:
cuda_scripted_model = torch.jit.script(
    cuda_scripted, example_inputs=cuda_scripted_inputs
)

In [13]:
model_save_path = "/home/tim/tmp/yolo_torchscript_postprocess.pt"
cuda_scripted_model.save(model_save_path)

In [14]:
import torch
from google.protobuf.json_format import ParseDict
from tritonclient.grpc import model_config_pb2

from core.utils.yaml_jinja import load_yaml_with_jinja
from lib.ml.inference.backends.triton import TritonInferenceBackend

# from lib.infra.utils.resolve_model_path import resolve_model_path

# model protos are defined here:
# https://github.com/triton-inference-server/common/blob/main/protobuf/model_config.proto


def test_postprocessing():
    """
    Tests yolo on the triton remote inference server
    """
    config_dict = {
        "name": "yolov5_postprocessing",
        "platform": "pytorch_libtorch",
        "max_batch_size": 16,
        "input": [
            {
                "name": "input0",
                "data_type": "TYPE_FP16",
                "dims": [28350, 7],
            },
            {
                "name": "input1",
                "data_type": "TYPE_FP16",
                "dims": [2],
            },
            {
                "name": "input2",
                "data_type": "TYPE_FP16",
                "dims": [2],
            },
            {
                "name": "input3",
                "data_type": "TYPE_INT32",
                "dims": [2],
            },
            {
                "name": "input4",
                "data_type": "TYPE_FP16",
                "dims": [1],
            },
            {
                "name": "input5",
                "data_type": "TYPE_FP16",
                "dims": [1],
            },
        ],
        "output": [
            {
                "name": "output0",
                "data_type": "TYPE_INT64",
                "dims": [1],
            },
            {
                "name": "output1",
                "data_type": "TYPE_FP32",
                "dims": [9],
            },
        ],
    }

    triton_config = model_config_pb2.ModelConfig()
    ParseDict(config_dict, triton_config)
    model_path = "/home/tim/tmp/yolo_torchscript_postprocess2.pt"

    backend = TritonInferenceBackend(
        model_name=model_path,
        triton_config=triton_config,
    )
    return backend



In [15]:
from lib.ml.inference.backends.triton import (
    InferInput,
    InferRequestedOutput,
)
import numpy as np


def generate_inputs(input_tensors):
    backend = test_postprocessing()
    inputs = [
        InferInput(f"input{i}", input_size, "FP16")
        if i != 3
        else InferInput(f"input{i}", input_size, "INT32")
        for i, input_size in enumerate(
            ([1, 28350, 7], [1, 2], [1, 2], [1, 2], [1, 1], [1, 1])
        )
    ]
    for i, input_ in enumerate(input_tensors):
        if i != 3:
            inputs[i].set_data_from_numpy(input_.numpy().astype(np.float16))
        else:
            inputs[i].set_data_from_numpy(input_.numpy().astype(np.int32))
    outputs = [
        InferRequestedOutput("output0"),
        InferRequestedOutput("output1"),
    ]
    results = backend.triton_client.infer(
        model_name="home_tim_tmp_yolo_torchscript_postprocess2.pt",
        inputs=inputs,
        outputs=outputs,
    )
    # the raw output leads to undefined behavior since the numpy reference is not writable
    # so we have to copy this
    return results

In [16]:
a = yolo_model(cuda_scripted_inputs[0][0])
inputs_ = list(i.cpu() for i in cuda_scripted_inputs[0])

In [17]:
from lib.ml.inference.tasks.object_detection_2d.yolov5.post_processing_model import (
    transform_and_post_process,
    unpack_observations,
)


class PostProcessing(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(
        self,
        prediction: torch.Tensor,
        offset: torch.Tensor,
        scale: torch.Tensor,
        classes: torch.Tensor,
        confidence_threshold: torch.Tensor,
        nms_threshold: torch.Tensor,
    ):
        return transform_and_post_process(
            prediction,
            offset,
            scale,
            classes,
            confidence_threshold,
            nms_threshold,
        )

In [18]:
p_model = PostProcessing()

In [19]:
cuda_p_model = p_model.to("cuda")

In [20]:
for c in cuda_scripted_inputs[1:]:
    c = c.unsqueeze()

In [21]:
# cuda_scripted_inputs[0] = (csi.unsqueeze(dim=0) for i, csi in enumerate(cuda_scripted_inputs[0]) if i> 0)

In [22]:
# cuda_scripted_inputs[0] = tuple(cuda_scripted_inputs[0])

In [23]:
cuda_scripted_inputs = [
    tuple(
        tensor.to("cuda")
        for tensor in (
            a[0],
            torch.tensor([offset]),
            torch.tensor([scale]),
            torch.tensor([[0, 1]]),
            torch.tensor([[0.7]]),
            torch.tensor([[0.2]]),
        )
    )
]

In [24]:
cuda_scripted_inputs

[(tensor([[[4.8254e+00, 6.7972e+00, 8.8472e+00,  ..., 8.2012e-09,
            6.2281e-01, 3.4054e-01],
           [1.5298e+01, 7.2738e+00, 2.1923e+01,  ..., 2.3744e-09,
            4.7648e-01, 5.1285e-01],
           [2.1765e+01, 7.6465e+00, 2.8101e+01,  ..., 1.0994e-08,
            7.0488e-01, 3.0642e-01],
           ...,
           [8.7527e+02, 4.5789e+02, 1.4945e+02,  ..., 3.0573e-08,
            6.4081e-01, 1.3216e-01],
           [9.1796e+02, 4.6129e+02, 1.5803e+02,  ..., 9.0671e-08,
            5.8365e-01, 1.6683e-01],
           [9.4555e+02, 4.6236e+02, 1.8089e+02,  ..., 8.3444e-08,
            6.0037e-01, 1.5930e-01]]], device='cuda:0'),
  tensor([[158.,   0.]], device='cuda:0'),
  tensor([[0.6486, 0.6486]], device='cuda:0'),
  tensor([[0, 1]], device='cuda:0'),
  tensor([[0.7000]], device='cuda:0'),
  tensor([[0.2000]], device='cuda:0'))]

In [25]:
for c in cuda_scripted_inputs[0]:
    print(c.size())

torch.Size([1, 28350, 7])
torch.Size([1, 2])
torch.Size([1, 2])
torch.Size([1, 2])
torch.Size([1, 1])
torch.Size([1, 1])


In [26]:
c_scripted_model = torch.jit.script(
    cuda_p_model, example_inputs=cuda_scripted_inputs
)

In [27]:
model_save_path = "/home/tim/tmp/yolo_torchscript_postprocess2.pt"
c_scripted_model.save(model_save_path)

In [28]:
# generate_inputs(cuda_scripted_inputs[0])
cpu_inputs = list(
    cuda_scripted_input.cpu()
    for cuda_scripted_input in cuda_scripted_inputs[0]
)

In [30]:
a = generate_inputs(cpu_inputs)

In [31]:
a

<tritonclient.grpc.InferResult at 0x7f63af95f820>