In [1]:
# ENV
# Driver Version: 510.47.03 
# CUDA Version: 11.8 
# TensorRT Version: 8.6.1
!cat path.sh
!source path.sh

export CUDA_HOME=/data/cuda/11.8
export PATH=$CUDA_HOME/bin:${PATH}
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:${LD_LIBRARY_PATH}

export TENSORRT_HOME=/data/cuda/TensorRT-8.6.1.6
export PATH=${TENSORRT_HOME}/bin:${PATH}
export LD_LIBRARY_PATH=${TENSORRT_HOME}/lib:${LD_LIBRARY_PATH}


In [2]:
# Define the bad case model
import torch
import torch.nn as nn

class CaseTest(nn.Module):
  def __init__(self) -> None:
    super().__init__()
  
  def forward(self, x: torch.Tensor):
    max_length = x.max()
    y = torch.arange(max_length)
    return y
  
model = CaseTest()
model.eval()
input_names = [ "x" ]
    
input_x = torch.Tensor([[x for x in range(10)]]).to(torch.int64)  # B x T
print(input_x, input_x.shape)
torch.onnx.export(model, 
                  (input_x, ), 
                  "onnx_export_badcase.onnx",
                  export_params=True,
                  do_constant_folding=True,
                  input_names=input_names,
                  output_names=['y'],
                  dynamic_axes={
                    'x':{0:'B', 1:'T_0'}
                  },
                  verbose=False)

tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]) torch.Size([1, 10])


In [3]:
# Folded the model and print out model topo in onnx format
!polygraphy surgeon sanitize onnx_export_badcase.onnx --fold-constants -o onnx_export_badcase.folded.onnx
!polygraphy inspect model onnx_export_badcase.folded.onnx --show layers


[W] 'colored' module is not installed, will not use colors when logging. To enable colors, please install the 'colored' module: python3 -m pip install colored
[I] RUNNING | Command: /data/k2/miniconda3/envs/tts_env/bin/polygraphy surgeon sanitize onnx_export_badcase.onnx --fold-constants -o onnx_export_badcase.folded.onnx
[I] Loading model: /data/k2/tts-latest/haoyue/onnx_export_badcase.onnx
[I] Original Model:
    Name: torch_jit | ONNX Opset: 14
    
    ---- 1 Graph Input(s) ----
    {x [dtype=int64, shape=('B', 'T_0')]}
    
    ---- 1 Graph Output(s) ----
    {y [dtype=int64, shape=('Range_4_o0__d0',)]}
    
    ---- 0 Initializer(s) ----
    
    ---- 5 Node(s) ----
    
[I] Folding Constants | Pass 1
[I]     Total Nodes | Original:     5, After Folding:     3 |     2 Nodes Folded
[I] Folding Constants | Pass 2
[I]     Total Nodes | Original:     3, After Folding:     3 |     0 Nodes Folded
[I] Saving ONNX model to: onnx_export_badcase.folded.onnx
[I] New Model:
    Name: torch_j

In [4]:
# Run this model in onnxruntime with cuda provider
import onnxruntime as ort
import numpy as np

cuda_providers = [
      ('CUDAExecutionProvider', {
      }),
      ('CPUExecutionProvider', {
        'intra_op_num_threads': 1,
        'inter_op_num_threads': 32,
      })
]
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
onnx_model = ort.InferenceSession("onnx_export_badcase.folded.onnx", sess_options, providers = cuda_providers)

model_inputs = {"x": input_x.numpy()}
print(model_inputs)
output_names = [n.name for n in onnx_model.get_outputs()]
print(output_names)
model_output = onnx_model.run(output_names, model_inputs)
print(model_output)

{'x': array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])}
['y']
[array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64)]


In [7]:
# Run this model in onnxruntime with TensorRT provider
import onnxruntime as ort
import numpy as np

cuda_providers = [
      ('TensorrtExecutionProvider', {
      }),
      ('CUDAExecutionProvider', {
      }),
      ('CPUExecutionProvider', {
        'intra_op_num_threads': 1,
        'inter_op_num_threads': 32,
      })
]
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
onnx_model = ort.InferenceSession("onnx_export_badcase.folded.onnx", sess_options, providers = cuda_providers)

model_inputs = {"x": input_x.numpy()}
print(model_inputs)
output_names = [n.name for n in onnx_model.get_outputs()]
print(output_names)
model_output = onnx_model.run(output_names, model_inputs)
print(model_output)

{'x': array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])}
['y']
[array([], dtype=int64)]




- Here we see that the output of model with cuda provider nor tensorrt provider is different.
- For CUDA provider, y = array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64)
- For TensorRT provider, y = array([], dtype=int64)

- It seems that there maybe some error in the TensorRT Graph optimization, so I run trtexec to get the TensorRT engine LayerInfo.
- Run trtexec cmd outside notebook.
```
!trtexec \
  --onnx=onnx_export_badcase.folded.onnx \
  --saveEngine=onnx_export_badcase.folded.trt \
  --nvtxMode=verbose \
  --exportLayerInfo=onnx_export_badcase.folded.json \
  --optShapes=x:1x77
```
- Get the graph topo of TensorRT engine: 

In [6]:
!cat onnx_export_badcase.folded.json

{"Layers": [{
  "Name": "[trainStation1]",
  "LayerType": "TrainStation",
  "Inputs": [],
  "Outputs": [],
  "TacticValue": "0x0000000000000000",
  "StreamId": 0,
  "Metadata": ""
},{
  "Name": "/ReduceMax",
  "LayerType": "Reduce",
  "Inputs": [
  {
    "Name": "x",
    "Location": "Device",
    "Dimensions": [1,77],
    "Format/Datatype": "Row major linear Int32"
  }],
  "Outputs": [
  {
    "Name": "/ReduceMax_output_0",
    "Location": "Device",
    "Dimensions": [],
    "Format/Datatype": "Row major linear Int32"
  }],
  "ParameterType": "Reduce",
  "Operation": "MAX",
  "ReduceAxes": [1,1],
  "KeepDimensions": 0,
  "TacticValue": "0x0000000000000001",
  "StreamId": 0,
  "Metadata": "[ONNX Layer: /ReduceMax]"
},{
  "Name": "/ReduceMax_output_0[DevicetoShapeHostCopy]",
  "LayerType": "DeviceToShapeHost",
  "Inputs": [
  {
    "Name": "/ReduceMax_output_0",
    "Location": "Device",
    "Dimensions": [],
    "Format/Datatype": "Row major linear Int32"
  }],
  "Outputs": [],
  "Tacti

- For the layer /Range， there is not input ...  
```
{
  "Name": "/Range",
  "LayerType": "Fill",
  "Inputs": [],
  "Outputs": [
  {
    "Name": "y",
    "Location": "Device",
    "Dimensions": [0],
    "Format/Datatype": "Row major linear Int32"
  }],
  "ParameterType": "Fill",
  "Op": "LINSPACE",
  "dimension": [-2147483647],
  "alpha": 0,
  "beta": 1,
  "TacticValue": "0x0000000000000000",
  "StreamId": 0,
  "Metadata": "[ONNX Layer: /Range]"
}
```
- Are there some bugs in TensorRT during graph building?