From 9a642086f9991eaff526357e7843247a8331c009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Tue, 30 Apr 2024 00:17:38 -0400 Subject: [PATCH 1/8] Add support for TensorRT v10 (multiple api calls have changed) --- frigate/detectors/plugins/tensorrt.py | 166 ++++++++++++++++++-------- 1 file changed, 114 insertions(+), 52 deletions(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index 2a57ec2d3c..0f41d7a8ab 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -6,6 +6,7 @@ try: import tensorrt as trt from cuda import cuda + TRT_VERSION=int(trt.__version__[0:trt.__version__.find(".")]) TRT_SUPPORT = True except ModuleNotFoundError: @@ -91,22 +92,40 @@ def _load_engine(self, model_path): def _get_input_shape(self): """Get input shape of the TensorRT YOLO engine.""" binding = self.engine[0] - assert self.engine.binding_is_input(binding) - binding_dims = self.engine.get_binding_shape(binding) - if len(binding_dims) == 4: - return ( - tuple(binding_dims[2:]), - trt.nptype(self.engine.get_binding_dtype(binding)), - ) - elif len(binding_dims) == 3: - return ( - tuple(binding_dims[1:]), - trt.nptype(self.engine.get_binding_dtype(binding)), - ) + if TRT_VERSION < 10: + assert self.engine.binding_is_input(binding) + binding_dims = self.engine.get_binding_shape(binding) + if len(binding_dims) == 4: + return ( + tuple(binding_dims[2:]), + trt.nptype(self.engine.get_binding_dtype(binding)), + ) + elif len(binding_dims) == 3: + return ( + tuple(binding_dims[1:]), + trt.nptype(self.engine.get_binding_dtype(binding)), + ) + else: + raise ValueError( + "bad dims of binding %s: %s" % (binding, str(binding_dims)) + ) else: - raise ValueError( - "bad dims of binding %s: %s" % (binding, str(binding_dims)) - ) + assert binding == "input" + binding_dims = self.engine.get_tensor_shape("input") + if len(binding_dims) == 4: + return ( + tuple(binding_dims[2:]), + trt.nptype(self.engine.get_tensor_dtype(binding)), + ) + elif len(binding_dims) == 3: + return ( + tuple(binding_dims[1:]), + trt.nptype(self.engine.get_tensor_dtype(binding)), + ) + else: + raise ValueError( + "bad dims of binding %s: %s" % (binding, str(binding_dims)) + ) def _allocate_buffers(self): """Allocates all host/device in/out buffers required for an engine.""" @@ -115,41 +134,78 @@ def _allocate_buffers(self): bindings = [] output_idx = 0 for binding in self.engine: - binding_dims = self.engine.get_binding_shape(binding) - if len(binding_dims) == 4: - # explicit batch case (TensorRT 7+) - size = trt.volume(binding_dims) - elif len(binding_dims) == 3: - # implicit batch case (TensorRT 6 or older) - size = trt.volume(binding_dims) * self.engine.max_batch_size - else: - raise ValueError( - "bad dims of binding %s: %s" % (binding, str(binding_dims)) + if TRT_VERSION < 10: + binding_dims = self.engine.get_binding_shape(binding) + if len(binding_dims) == 4: + # explicit batch case (TensorRT 7+) + size = trt.volume(binding_dims) + elif len(binding_dims) == 3: + # implicit batch case (TensorRT 6 or older) + size = trt.volume(binding_dims) * self.engine.max_batch_size + else: + raise ValueError( + "bad dims of binding %s: %s" % (binding, str(binding_dims)) + ) + nbytes = size * self.engine.get_binding_dtype(binding).itemsize + # Allocate host and device buffers + err, host_mem = cuda.cuMemHostAlloc( + nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP ) - nbytes = size * self.engine.get_binding_dtype(binding).itemsize - # Allocate host and device buffers - err, host_mem = cuda.cuMemHostAlloc( - nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP - ) - assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}" - logger.debug( - f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_binding_dtype(binding)})" - ) - err, device_mem = cuda.cuMemAlloc(nbytes) - assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}" - # Append the device buffer to device bindings. - bindings.append(int(device_mem)) - # Append to the appropriate list. - if self.engine.binding_is_input(binding): - logger.debug(f"Input has Shape {binding_dims}") - inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}" + logger.debug( + f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_binding_dtype(binding)})" + ) + err, device_mem = cuda.cuMemAlloc(nbytes) + assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}" + # Append the device buffer to device bindings. + bindings.append(int(device_mem)) + # Append to the appropriate list. + if self.engine.binding_is_input(binding): + logger.debug(f"Input has Shape {binding_dims}") + inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + else: + # each grid has 3 anchors, each anchor generates a detection + # output of 7 float32 values + assert size % 7 == 0, f"output size was {size}" + logger.debug(f"Output has Shape {binding_dims}") + outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + output_idx += 1 else: - # each grid has 3 anchors, each anchor generates a detection - # output of 7 float32 values - assert size % 7 == 0, f"output size was {size}" - logger.debug(f"Output has Shape {binding_dims}") - outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) - output_idx += 1 + binding_dims = self.engine.get_tensor_shape(binding) + if len(binding_dims) == 4: + # explicit batch case (TensorRT 7+) + size = trt.volume(binding_dims) + elif len(binding_dims) == 3: + # implicit batch case (TensorRT 6 or older) + size = trt.volume(binding_dims) * self.engine.max_batch_size + else: + raise ValueError( + "bad dims of binding %s: %s" % (binding, str(binding_dims)) + ) + nbytes = size * self.engine.get_tensor_dtype(binding).itemsize + # Allocate host and device buffers + err, host_mem = cuda.cuMemHostAlloc( + nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP + ) + assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}" + logger.debug( + f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_tensor_dtype(binding)})" + ) + err, device_mem = cuda.cuMemAlloc(nbytes) + assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}" + # Append the device buffer to device bindings. + bindings.append(int(device_mem)) + # Append to the appropriate list. + if binding == "input": + logger.debug(f"Input has Shape {binding_dims}") + inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + else: + # each grid has 3 anchors, each anchor generates a detection + # output of 7 float32 values + assert size % 7 == 0, f"output size was {size}" + logger.debug(f"Output has Shape {binding_dims}") + outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + output_idx += 1 assert len(inputs) == 1, f"inputs len was {len(inputs)}" assert len(outputs) == 1, f"output len was {len(outputs)}" return inputs, outputs, bindings @@ -170,10 +226,16 @@ def _do_inference(self): ] # Run inference. - if not self.context.execute_async_v2( - bindings=self.bindings, stream_handle=self.stream - ): - logger.warn("Execute returned false") + if TRT_VERSION < 10: + if not self.context.execute_async_v2( + bindings=self.bindings, stream_handle=self.stream + ): + logger.warn("Execute returned false") + else: + if not self.context.execute_v2( + self.bindings + ): + logger.warn("Execute returned false") # Transfer predictions back from the GPU. [ From 408f2954168ee836988567286e92dc6c88c5bfbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Sun, 5 May 2024 10:01:35 -0400 Subject: [PATCH 2/8] Remove unnecessary size check in TensorRT v10 block --- frigate/detectors/plugins/tensorrt.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index 0f41d7a8ab..d9006fe864 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -175,9 +175,6 @@ def _allocate_buffers(self): if len(binding_dims) == 4: # explicit batch case (TensorRT 7+) size = trt.volume(binding_dims) - elif len(binding_dims) == 3: - # implicit batch case (TensorRT 6 or older) - size = trt.volume(binding_dims) * self.engine.max_batch_size else: raise ValueError( "bad dims of binding %s: %s" % (binding, str(binding_dims)) From 6c0abe4833a3604369c121df899075d46fae3103 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Tue, 7 May 2024 21:15:57 -0400 Subject: [PATCH 3/8] Refactor to reduce code duplication --- frigate/detectors/plugins/tensorrt.py | 185 +++++++++++--------------- 1 file changed, 75 insertions(+), 110 deletions(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index d9006fe864..b3bcd360c1 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -89,43 +89,50 @@ def _load_engine(self, model_path): with open(model_path, "rb") as f, trt.Runtime(self.trt_logger) as runtime: return runtime.deserialize_cuda_engine(f.read()) - def _get_input_shape(self): - """Get input shape of the TensorRT YOLO engine.""" - binding = self.engine[0] + def _binding_is_input(self, binding): if TRT_VERSION < 10: assert self.engine.binding_is_input(binding) - binding_dims = self.engine.get_binding_shape(binding) - if len(binding_dims) == 4: - return ( - tuple(binding_dims[2:]), - trt.nptype(self.engine.get_binding_dtype(binding)), - ) - elif len(binding_dims) == 3: - return ( - tuple(binding_dims[1:]), - trt.nptype(self.engine.get_binding_dtype(binding)), - ) - else: - raise ValueError( - "bad dims of binding %s: %s" % (binding, str(binding_dims)) - ) else: assert binding == "input" - binding_dims = self.engine.get_tensor_shape("input") - if len(binding_dims) == 4: - return ( - tuple(binding_dims[2:]), - trt.nptype(self.engine.get_tensor_dtype(binding)), - ) - elif len(binding_dims) == 3: - return ( - tuple(binding_dims[1:]), - trt.nptype(self.engine.get_tensor_dtype(binding)), - ) - else: - raise ValueError( - "bad dims of binding %s: %s" % (binding, str(binding_dims)) - ) + return True + + def _get_binding_dims(self, binding): + if TRT_VERSION < 10: + return self.engine.get_binding_shape(binding) + else: + return self.engine.get_tensor_shape(binding) + + def _get_binding_dtype(self, binding): + if TRT_VERSION < 10: + return self.engine.get_binding_dtype(binding) + else: + return self.engine.get_tensor_shape(binding) + + def _execute(self): + if TRT_VERSION < 10: + return self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream) + else: + return self.context.execute_v2(self.bindings) + + def _get_input_shape(self): + """Get input shape of the TensorRT YOLO engine.""" + binding = self.engine[0] + assert self._binding_is_input(binding) + binding_dims = self._get_binding_dims(binding) + if len(binding_dims) == 4: + return ( + tuple(binding_dims[2:]), + trt.nptype(self._get_binding_dtype(binding)), + ) + elif len(binding_dims) == 3: + return ( + tuple(binding_dims[1:]), + trt.nptype(self._get_binding_dtype(binding)), + ) + else: + raise ValueError( + "bad dims of binding %s: %s" % (binding, str(binding_dims)) + ) def _allocate_buffers(self): """Allocates all host/device in/out buffers required for an engine.""" @@ -134,75 +141,41 @@ def _allocate_buffers(self): bindings = [] output_idx = 0 for binding in self.engine: - if TRT_VERSION < 10: - binding_dims = self.engine.get_binding_shape(binding) - if len(binding_dims) == 4: - # explicit batch case (TensorRT 7+) - size = trt.volume(binding_dims) - elif len(binding_dims) == 3: - # implicit batch case (TensorRT 6 or older) - size = trt.volume(binding_dims) * self.engine.max_batch_size - else: - raise ValueError( - "bad dims of binding %s: %s" % (binding, str(binding_dims)) - ) - nbytes = size * self.engine.get_binding_dtype(binding).itemsize - # Allocate host and device buffers - err, host_mem = cuda.cuMemHostAlloc( - nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP - ) - assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}" - logger.debug( - f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_binding_dtype(binding)})" - ) - err, device_mem = cuda.cuMemAlloc(nbytes) - assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}" - # Append the device buffer to device bindings. - bindings.append(int(device_mem)) - # Append to the appropriate list. - if self.engine.binding_is_input(binding): - logger.debug(f"Input has Shape {binding_dims}") - inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) - else: - # each grid has 3 anchors, each anchor generates a detection - # output of 7 float32 values - assert size % 7 == 0, f"output size was {size}" - logger.debug(f"Output has Shape {binding_dims}") - outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) - output_idx += 1 + binding_dims = self._get_binding_dims(binding) + if len(binding_dims) == 4: + # explicit batch case (TensorRT 7+) + size = trt.volume(binding_dims) + elif len(binding_dims) == 3: + # implicit batch case (TensorRT 6 or older) + size = trt.volume(binding_dims) * self.engine.max_batch_size else: - binding_dims = self.engine.get_tensor_shape(binding) - if len(binding_dims) == 4: - # explicit batch case (TensorRT 7+) - size = trt.volume(binding_dims) - else: - raise ValueError( - "bad dims of binding %s: %s" % (binding, str(binding_dims)) - ) - nbytes = size * self.engine.get_tensor_dtype(binding).itemsize - # Allocate host and device buffers - err, host_mem = cuda.cuMemHostAlloc( - nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP - ) - assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}" - logger.debug( - f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self.engine.get_tensor_dtype(binding)})" + raise ValueError( + "bad dims of binding %s: %s" % (binding, str(binding_dims)) ) - err, device_mem = cuda.cuMemAlloc(nbytes) - assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}" - # Append the device buffer to device bindings. - bindings.append(int(device_mem)) - # Append to the appropriate list. - if binding == "input": - logger.debug(f"Input has Shape {binding_dims}") - inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) - else: - # each grid has 3 anchors, each anchor generates a detection - # output of 7 float32 values - assert size % 7 == 0, f"output size was {size}" - logger.debug(f"Output has Shape {binding_dims}") - outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) - output_idx += 1 + nbytes = size * self._get_binding_dtype(binding).itemsize + # Allocate host and device buffers + err, host_mem = cuda.cuMemHostAlloc( + nbytes, Flags=cuda.CU_MEMHOSTALLOC_DEVICEMAP + ) + assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAllocHost returned {err}" + logger.debug( + f"Allocated Tensor Binding {binding} Memory {nbytes} Bytes ({size} * {self._get_binding_dtype(binding)})" + ) + err, device_mem = cuda.cuMemAlloc(nbytes) + assert err is cuda.CUresult.CUDA_SUCCESS, f"cuMemAlloc returned {err}" + # Append the device buffer to device bindings. + bindings.append(int(device_mem)) + # Append to the appropriate list. + if self._binding_is_input(binding): + logger.debug(f"Input has Shape {binding_dims}") + inputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + else: + # each grid has 3 anchors, each anchor generates a detection + # output of 7 float32 values + assert size % 7 == 0, f"output size was {size}" + logger.debug(f"Output has Shape {binding_dims}") + outputs.append(HostDeviceMem(host_mem, device_mem, nbytes, size)) + output_idx += 1 assert len(inputs) == 1, f"inputs len was {len(inputs)}" assert len(outputs) == 1, f"output len was {len(outputs)}" return inputs, outputs, bindings @@ -223,16 +196,8 @@ def _do_inference(self): ] # Run inference. - if TRT_VERSION < 10: - if not self.context.execute_async_v2( - bindings=self.bindings, stream_handle=self.stream - ): - logger.warn("Execute returned false") - else: - if not self.context.execute_v2( - self.bindings - ): - logger.warn("Execute returned false") + if not self._execute(): + logger.warn("Execute returned false") # Transfer predictions back from the GPU. [ From 485f3075747a9754223280531749f3fbfc69401d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Tue, 7 May 2024 22:38:07 -0400 Subject: [PATCH 4/8] Fix wrong function name in new _get_binding_dtype function and only return input check (not assertion) in new _binding_is_input function --- frigate/detectors/plugins/tensorrt.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index b3bcd360c1..d43b9c9022 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -91,10 +91,9 @@ def _load_engine(self, model_path): def _binding_is_input(self, binding): if TRT_VERSION < 10: - assert self.engine.binding_is_input(binding) + return self.engine.binding_is_input(binding) else: - assert binding == "input" - return True + return binding == "input" def _get_binding_dims(self, binding): if TRT_VERSION < 10: @@ -106,7 +105,7 @@ def _get_binding_dtype(self, binding): if TRT_VERSION < 10: return self.engine.get_binding_dtype(binding) else: - return self.engine.get_tensor_shape(binding) + return self.engine.get_tensor_dtype(binding) def _execute(self): if TRT_VERSION < 10: From 1879951bc098659413b925439f2be72eff7385bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Sat, 18 May 2024 14:42:30 -0400 Subject: [PATCH 5/8] Add space around TRT_VERSION variable assignment (=) to respect linting --- frigate/detectors/plugins/tensorrt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index d43b9c9022..6febd1e333 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -6,7 +6,7 @@ try: import tensorrt as trt from cuda import cuda - TRT_VERSION=int(trt.__version__[0:trt.__version__.find(".")]) + TRT_VERSION = int(trt.__version__[0:trt.__version__.find(".")]) TRT_SUPPORT = True except ModuleNotFoundError: From be84fe8a5e71095a8e28e18b54d81e356a29469a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Sat, 18 May 2024 15:41:39 -0400 Subject: [PATCH 6/8] More linting fix --- frigate/detectors/plugins/tensorrt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index 6febd1e333..6502122929 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -6,7 +6,7 @@ try: import tensorrt as trt from cuda import cuda - TRT_VERSION = int(trt.__version__[0:trt.__version__.find(".")]) + TRT_VERSION = int(trt.__version__[0 : trt.__version__.find(".")]) TRT_SUPPORT = True except ModuleNotFoundError: From 1bd26a082cbfbce61e43235a9474a73571c3190f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Sat, 18 May 2024 15:57:59 -0400 Subject: [PATCH 7/8] Update frigate/detectors/plugins/tensorrt.py Co-authored-by: Nicolas Mowen --- frigate/detectors/plugins/tensorrt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index 6502122929..f1c3d25119 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -6,6 +6,7 @@ try: import tensorrt as trt from cuda import cuda + TRT_VERSION = int(trt.__version__[0 : trt.__version__.find(".")]) TRT_SUPPORT = True From 4695f8284a91a226c5be88ee44891e4f4ef4ba21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20B=C3=A9dard-Couture?= Date: Sat, 18 May 2024 16:05:06 -0400 Subject: [PATCH 8/8] More linting --- frigate/detectors/plugins/tensorrt.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frigate/detectors/plugins/tensorrt.py b/frigate/detectors/plugins/tensorrt.py index 6502122929..b362f0b0b5 100644 --- a/frigate/detectors/plugins/tensorrt.py +++ b/frigate/detectors/plugins/tensorrt.py @@ -109,7 +109,9 @@ def _get_binding_dtype(self, binding): def _execute(self): if TRT_VERSION < 10: - return self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream) + return self.context.execute_async_v2( + bindings=self.bindings, stream_handle=self.stream + ) else: return self.context.execute_v2(self.bindings)