# Triton Model Configuration Generator

This notebook creates `config.pbtxt` files for DDColor models in TensorRT and ONNX formats for Triton Inference Server.

In [1]:
import os
from pathlib import Path

## Define the base workspace path

In [2]:
# Base workspace path
workspace_path_trt = Path('/workspace/model_repository_trt')
workspace_path_onnx = Path('/workspace/model_repository_onnx')

workspace_path_trt.mkdir(parents=True, exist_ok=True)
workspace_path_onnx.mkdir(parents=True, exist_ok=True)

# Create directories if they don't exist
trt_model_path = workspace_path_trt / 'ddcolor_trt'
onnx_model_path = workspace_path_onnx / 'ddcolor_onnx'

trt_model_path.mkdir(parents=True, exist_ok=True)
onnx_model_path.mkdir(parents=True, exist_ok=True)

print(f"TensorRT model path: {trt_model_path}")
print(f"ONNX model path: {onnx_model_path}")

TensorRT model path: /workspace/model_repository_trt/ddcolor_trt
ONNX model path: /workspace/model_repository_onnx/ddcolor_onnx


## Create config.pbtxt for TensorRT model

In [3]:
# TensorRT configuration
trt_config = """name: "ddcolor_trt"
platform: "tensorrt_plan"
max_batch_size: 16
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 3, 512, 512 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, 512, 512 ]
  }
]
instance_group [
  {
    count: 1
    kind: KIND_GPU
    gpus: [ 0 ]
  }
]
dynamic_batching {
  preferred_batch_size: [ 1, 2, 4, 8, 12, 16 ]
  max_queue_delay_microseconds: 100
}
optimization {
  cuda {
    graphs: true
  }
}"""

# Write TensorRT config
trt_config_path = trt_model_path / 'config.pbtxt'
with open(trt_config_path, 'w') as f:
    f.write(trt_config)

print(f"TensorRT config written to: {trt_config_path}")
print("\nTensorRT Configuration:")
print("-" * 50)
print(trt_config)

TensorRT config written to: /workspace/model_repository_trt/ddcolor_trt/config.pbtxt

TensorRT Configuration:
--------------------------------------------------
name: "ddcolor_trt"
platform: "tensorrt_plan"
max_batch_size: 16
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 3, 512, 512 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, 512, 512 ]
  }
]
instance_group [
  {
    count: 1
    kind: KIND_GPU
    gpus: [ 0 ]
  }
]
dynamic_batching {
  preferred_batch_size: [ 1, 2, 4, 8, 12, 16 ]
  max_queue_delay_microseconds: 100
}
optimization {
  cuda {
    graphs: true
  }
}


## Create config.pbtxt for ONNX model

In [4]:
# ONNX configuration
onnx_config = """name: "ddcolor_onnx"
platform: "onnxruntime_onnx"
max_batch_size: 16
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 3, 512, 512 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, 512, 512 ]
  }
]
instance_group [
  {
    count: 1
    kind: KIND_GPU
    gpus: [ 0 ]
  }
]
dynamic_batching {
  preferred_batch_size: [ 1, 2, 4, 8, 12, 16 ]
  max_queue_delay_microseconds: 100
}
optimization {
  execution_accelerators {
    gpu_execution_accelerator [
      {
        name: "tensorrt"
        parameters { key: "precision_mode" value: "FP16" }
        parameters { key: "max_workspace_size_bytes" value: "1073741824" }
      }
    ]
  }
}"""

# Write ONNX config
onnx_config_path = onnx_model_path / 'config.pbtxt'
with open(onnx_config_path, 'w') as f:
    f.write(onnx_config)

print(f"ONNX config written to: {onnx_config_path}")
print("\nONNX Configuration:")
print("-" * 50)
print(onnx_config)

ONNX config written to: /workspace/model_repository_onnx/ddcolor_onnx/config.pbtxt

ONNX Configuration:
--------------------------------------------------
name: "ddcolor_onnx"
platform: "onnxruntime_onnx"
max_batch_size: 16
input [
  {
    name: "input"
    data_type: TYPE_FP32
    dims: [ 3, 512, 512 ]
  }
]
output [
  {
    name: "output"
    data_type: TYPE_FP32
    dims: [ 2, 512, 512 ]
  }
]
instance_group [
  {
    count: 1
    kind: KIND_GPU
    gpus: [ 0 ]
  }
]
dynamic_batching {
  preferred_batch_size: [ 1, 2, 4, 8, 12, 16 ]
  max_queue_delay_microseconds: 100
}
optimization {
  execution_accelerators {
    gpu_execution_accelerator [
      {
        name: "tensorrt"
        parameters { key: "precision_mode" value: "FP16" }
        parameters { key: "max_workspace_size_bytes" value: "1073741824" }
      }
    ]
  }
}


## Verify the created files

In [5]:
# Verify files were created
print("\nVerifying created files:")
print("=" * 50)

# Check TensorRT config
if trt_config_path.exists():
    print(f"✅ TensorRT config exists at: {trt_config_path}")
    print(f"   File size: {trt_config_path.stat().st_size} bytes")
else:
    print(f"❌ TensorRT config not found at: {trt_config_path}")

# Check ONNX config
if onnx_config_path.exists():
    print(f"✅ ONNX config exists at: {onnx_config_path}")
    print(f"   File size: {onnx_config_path.stat().st_size} bytes")
else:
    print(f"❌ ONNX config not found at: {onnx_config_path}")


Verifying created files:
✅ TensorRT config exists at: /workspace/model_repository_trt/ddcolor_trt/config.pbtxt
   File size: 466 bytes
✅ ONNX config exists at: /workspace/model_repository_onnx/ddcolor_onnx/config.pbtxt
   File size: 684 bytes


## Create version directories

Triton typically expects model files to be in version directories. Run this cell if you want to create version directories for your models.

In [7]:
# Create version directories (optional but recommended for Triton)
version = "1"

# Create version directory for TensorRT model
trt_version_path = trt_model_path / version
trt_version_path.mkdir(parents=True, exist_ok=True)
print(f"Created TensorRT version directory: {trt_version_path}")

# Create version directory for ONNX model
onnx_version_path = onnx_model_path / version
onnx_version_path.mkdir(parents=True, exist_ok=True)
print(f"Created ONNX version directory: {onnx_version_path}")

print("\nNote: Place your model files in these version directories:")
print(f"  - TensorRT model (.plan file) → {trt_version_path}/")
print(f"  - ONNX model (.onnx file) → {onnx_version_path}/")

Created TensorRT version directory: /workspace/model_repository_trt/ddcolor_trt/1
Created ONNX version directory: /workspace/model_repository_onnx/ddcolor_onnx/1

Note: Place your model files in these version directories:
  - TensorRT model (.plan file) → /workspace/model_repository_trt/ddcolor_trt/1/
  - ONNX model (.onnx file) → /workspace/model_repository_onnx/ddcolor_onnx/1/


## Create version directories

Copy model files to the respective directories

In [1]:
!cp /workspace/exported/model.plan /workspace/model_repository_trt/ddcolor_trt/1/
!cp /workspace/exported/model.onnx /workspace/model_repository_onnx/ddcolor_onnx/1/

In [3]:
!rm -rf `find -type d -name .ipynb_checkpoints`

## Summary

This notebook has created the following configuration files:

1. **TensorRT Configuration** (`/workspace/model_repository_trt/ddcolor_trt/config.pbtxt`):
   - Platform: `tensorrt_plan`
   - Max batch size: 16
   - Input: FP16, dims [3, 512, 512]
   - Output: FP16, dims [2, 512, 512]
   - GPU execution with CUDA graphs optimization

2. **ONNX Configuration** (`/workspace/model_repository_onnx/ddcolor_onnx/config.pbtxt`):
   - Platform: `onnxruntime_onnx`
   - Max batch size: 16
   - Input: FP32, dims [3, 512, 512]
   - Output: FP32, dims [2, 512, 512]
   - GPU execution with TensorRT acceleration (FP16 precision)

Both models are configured with:
- Dynamic batching with preferred sizes: [1, 2, 4, 8, 12, 16]
- Single GPU instance (GPU 0)
- Max queue delay: 100 microseconds