In [1]:
# Step 1: Verify TensorRT and PyCUDA installation
try:
    import tensorrt as trt
    import pycuda.driver as cuda
    import pycuda.autoinit
    print("TensorRT and PyCUDA are installed.")
except ImportError as e:
    print("TensorRT and/or PyCUDA are not installed. Please install them before proceeding.")
    raise e

import os
import numpy as np
from Helper.calibrator import * 
import glob


TensorRT and PyCUDA are installed.


In [2]:
from Helper.onnx_konv import ONNXModelConverter

checkpoint_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/hyper_city/deeplabv3_resnet101_best_checkpoint.pkl"  
export_dir = "./onnx_models2"  # <-- Update this if desired!

model_name = "deeplabv3_resnet101"

# Create the converter object with the specified parameters
converter = ONNXModelConverter(
    checkpoint_path=checkpoint_path,
    model_name=model_name,
    width=2048,
    height=1024,
    input_shape=(1, 3, 520, 520),
    dynamic_batch=True,
    opset_version=13,
    output_dir=export_dir,
    skip_local_load=True
)

print("[DEBUG] Loading model from checkpoint...")
converter.load_model()
print("[DEBUG] Model loaded successfully.")

print("[DEBUG] Exporting FP32 ONNX model...")
fp32_onnx_path = converter.export_fp32()
print("FP32 ONNX model exported at:", fp32_onnx_path)


Initialisierung abgeschlossen. Gerät: cuda. Ausgabeordner: ./onnx_models2
[DEBUG] Loading model from checkpoint...


2025-02-10 14:45:56.195995: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using CUDA GPU
Model loaded: deeplabv3_resnet101 | Device: cuda 
Skipping local .pth load logic (likely using external Ray checkpoint).
Modell deeplabv3_resnet101 erfolgreich geladen und auf cuda platziert.
[DEBUG] Model loaded successfully.
[DEBUG] Exporting FP32 ONNX model...
FP32 ONNX-Modell exportiert: ./onnx_models2/deeplabv3_resnet101_fp32.onnx
FP32 ONNX model exported at: ./onnx_models2/deeplabv3_resnet101_fp32.onnx


In [3]:
run = False
calibration_data_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images" 
input_tensor_name = "input"

print("[DEBUG] Converting FP32 ONNX model to INT8 calibrated ONNX model...")
if run: 
    int8_calibrated_onnx_path = converter.quantize_int8_calibrated(
        fp32_onnx_path=fp32_onnx_path,
        calibration_data_path=calibration_data_path,
        input_tensor_name=input_tensor_name,
        max_samples=100
    )
    print("INT8 calibrated ONNX model exported at:", int8_calibrated_onnx_path)


[DEBUG] Converting FP32 ONNX model to INT8 calibrated ONNX model...


In [4]:
import os

# Update this path to your calibration images directory.
calibration_image_dir = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images"

# Gather all calibration image paths (adjust the file extensions as needed)
calibration_image_paths = [
    os.path.join(calibration_image_dir, f)
    for f in os.listdir(calibration_image_dir)
    if f.lower().endswith((".png", ".jpg", ".jpeg"))
]

print(f"Found {len(calibration_image_paths)} calibration images.")

# Set batch size, input shape, and max_samples.
batch_size = 8
input_shape = (3, 520, 520)  # (C, H, W)
max_samples = 100
cache_file = "calibration.cache"

# Instantiate the calibrator.
calibrator = MyCalibrator(
    calibration_image_paths=calibration_image_paths,
    batch_size=batch_size,
    input_shape=input_shape,
    max_samples=max_samples,
    cache_file=cache_file
)
print("Calibrator instantiated successfully.")






Found 3475 calibration images.
[DEBUG] Processed calibration image 1/100: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images/000001593_01.png
[DEBUG] Processed calibration image 2/100: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images/00000997_01.png
[DEBUG] Processed calibration image 3/100: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images/000001056_01.png
[DEBUG] Processed calibration image 4/100: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images/000003355_01.png
[DEBUG] Processed calibration image 5/100: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images/000002130_01.png
[DEBUG] Processed calibration image 6/100: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images/00000171_01.png
[DEBUG] Processed calibration image 7/100: /home/jan/studienarbei

In [9]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def build_engine(onnx_file_path, engine_file_path, calibrator=None):
    """
    Build a TensorRT engine from an ONNX file.
    
    Parameters:
      - onnx_file_path: Path to the INT8-calibrated ONNX model.
      - engine_file_path: Where to save the resulting engine file.
      - calibrator: An instance of your custom calibrator (for INT8 mode), or None.
      
    Returns:
      - The path to the saved engine file, or None if engine build failed.
    """
    # Use explicit batch mode by setting the appropriate flag.
    explicit_batch = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    
    # Create the builder, network, builder config, and ONNX parser.
    with trt.Builder(TRT_LOGGER) as builder, \
         builder.create_network(explicit_batch) as network, \
         builder.create_builder_config() as config, \
         trt.OnnxParser(network, TRT_LOGGER) as parser:

        # Set the maximum workspace size (e.g., 1GB) using the new API:
        config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1GB

        # If a calibrator is provided, enable INT8 mode.
        if calibrator is not None:
            print("[DEBUG] Enabling INT8 mode with calibrator...")
            config.set_flag(trt.BuilderFlag.INT8)
            config.int8_calibrator = calibrator

        # Parse the ONNX model.
        with open(onnx_file_path, 'rb') as model_file:
            print(f"[DEBUG] Parsing ONNX model from: {onnx_file_path}")
            if not parser.parse(model_file.read()):
                print("Failed to parse the ONNX model. Errors:")
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None

        print("[DEBUG] Successfully parsed ONNX model. Building engine...")
        engine = builder.build_engine(network, config)
        if engine is None:
            print("Engine build failed!")
            return None

        # Serialize and save the engine.
        with open(engine_file_path, 'wb') as f:
            f.write(engine.serialize())
        print("[DEBUG] Engine built and saved successfully!")
        return engine_file_path

# Example usage:
onnx_int8_calibrated_onnx_path = "./onnx_models/deeplabv3_resnet101_fp32_int8_calibrated_fixed.onnx"
engine_output_path = "./engine_files/deeplabv3_resnet101_int8_calibrated.engine"
os.makedirs(os.path.dirname(engine_output_path), exist_ok=True)
engine_file = build_engine(onnx_int8_calibrated_onnx_path, engine_output_path, calibrator=calibrator)
print("TensorRT engine file exported at:", engine_file)


[DEBUG] Enabling INT8 mode with calibrator...
[DEBUG] Parsing ONNX model from: ./onnx_models/deeplabv3_resnet101_fp32_int8_calibrated_fixed.onnx
[02/10/2025-14:50:07] [TRT] [E] In node 15 with name: input_QuantizeLinear and operator: QuantizeLinear (QuantDequantLinearHelper): INVALID_NODE: Assertion failed: shiftIsAllZeros(zeroPoint): TensorRT only supports symmetric quantization. The zero point for the QuantizeLinear/DequantizeLinear operator must be all zeros.Failed to parse the ONNX model. Errors:
In node 15 with name: input_QuantizeLinear and operator: QuantizeLinear (QuantDequantLinearHelper): INVALID_NODE: Assertion failed: shiftIsAllZeros(zeroPoint): TensorRT only supports symmetric quantization. The zero point for the QuantizeLinear/DequantizeLinear operator must be all zeros.

TensorRT engine file exported at: None


  config.int8_calibrator = calibrator


In [7]:
import onnx
import numpy as np
from onnx import numpy_helper

# Update this path to your ONNX model.
model_path = "./onnx_models/deeplabv3_resnet101_fp32_int8_calibrated.onnx"
fixed_model_path = "./onnx_models/deeplabv3_resnet101_fp32_int8_calibrated_fixed.onnx"

# Load the ONNX model.
model = onnx.load(model_path)
graph = model.graph

def find_initializer(name):
    for init in graph.initializer:
        if init.name == name:
            return numpy_helper.to_array(init)
    return None

# Locate the offending dequantize node.
offending_node = None
for node in graph.node:
    if node.op_type == "DequantizeLinear" and node.name == "classifier.4.bias_DequantizeLinear":
        offending_node = node
        break

if offending_node is None:
    print("Offending node 'classifier.4.bias_DequantizeLinear' not found.")
else:
    print("Offending node found:")
    print(offending_node)
    
    # The dequantize node should have three inputs:
    #   input[0]: quantized bias (e.g., int8 array)
    #   input[1]: scale (float)
    #   input[2]: zero point (int8 or uint8)
    quant_bias = find_initializer(offending_node.input[0])
    scale = find_initializer(offending_node.input[1])
    zero_point = find_initializer(offending_node.input[2])
    
    if quant_bias is None or scale is None or zero_point is None:
        print("Could not locate one or more of the required initializers.")
    else:
        print("Quantized bias shape:", quant_bias.shape)
        print("Scale:", scale)
        print("Zero point:", zero_point)
        
        # Compute the dequantized bias.
        dequant_bias = (quant_bias.astype(np.float32) - zero_point.astype(np.float32)) * scale.astype(np.float32)
        print("Dequantized bias computed, shape:", dequant_bias.shape)
        
        # Create a new initializer for the dequantized bias.
        new_bias_initializer = numpy_helper.from_array(dequant_bias, name=offending_node.output[0])
        
        # Add the new initializer to the graph.
        graph.initializer.append(new_bias_initializer)
        
        # Remove the offending dequantize node from the graph.
        graph.node.remove(offending_node)
        print("Offending DequantizeLinear node removed.")
        
        # Save the fixed model.
        onnx.save(model, fixed_model_path)
        print(f"Fixed model saved at: {fixed_model_path}")


Offending node found:
input: "classifier.4.bias_quantized"
input: "classifier.4.bias_quantized_scale"
input: "classifier.4.bias_quantized_zero_point"
output: "classifier.4.bias"
name: "classifier.4.bias_DequantizeLinear"
op_type: "DequantizeLinear"

Quantized bias shape: (20,)
Scale: [7.932402e-05]
Zero point: 0
Dequantized bias computed, shape: (20,)
Offending DequantizeLinear node removed.
Fixed model saved at: ./onnx_models/deeplabv3_resnet101_fp32_int8_calibrated_fixed.onnx
