ai-dynamo · GuanLuo · Oct 30, 2025 · Oct 23, 2025 · Oct 24, 2025 · Oct 24, 2025
@@ -44,7 +44,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 }
 
 fn build_protos() -> Result<(), Box<dyn std::error::Error>> {
-    tonic_build::compile_protos("src/grpc/protos/kserve.proto")?;
+    tonic_build::configure()
+        .type_attribute(".", "#[derive(serde::Serialize,serde::Deserialize)]")
+        .compile_protos(&["kserve.proto"], &["src/grpc/protos"])?;
     Ok(())
 }
 

@@ -11,6 +11,8 @@ use crate::http::service::Metrics;
 use crate::http::service::metrics;
 
 use crate::discovery::ModelManager;
+use crate::local_model::runtime_config::ModelRuntimeConfig;
+use crate::protocols::tensor::TensorModelConfig;
 use crate::protocols::tensor::{NvCreateTensorRequest, NvCreateTensorResponse};
 use crate::request_template::RequestTemplate;
 use anyhow::Result;
@@ -38,6 +40,8 @@ use inference::{
     ModelMetadataRequest, ModelMetadataResponse, ModelStreamInferResponse,
 };
 
+use prost::Message;
+
 /// [gluo TODO] 'metrics' are for HTTP service and there is HTTP endpoint
 /// for it as part of HTTP service. Should we always start HTTP service up
 /// for non-inference?
@@ -157,6 +161,27 @@ impl KserveServiceConfigBuilder {
     }
 }
 
+#[allow(clippy::large_enum_variant)]
+enum Config {
+    Dynamo(TensorModelConfig),
+    Triton(ModelConfig),
+}
+
+impl Config {
+    fn from_runtime_config(runtime_config: &ModelRuntimeConfig) -> Result<Config, anyhow::Error> {
+        if let Some(tensor_model_config) = runtime_config.tensor_model_config.as_ref() {
+            if let Some(triton_model_config) = tensor_model_config.triton_model_config.as_ref() {
+                let model_config = ModelConfig::decode(triton_model_config.as_slice())?;
+                Ok(Config::Triton(model_config))
+            } else {
+                Ok(Config::Dynamo(tensor_model_config.clone()))
+            }
+        } else {
+            Err(anyhow::anyhow!("no model config is provided"))
+        }
+    }
+}
+
 #[tonic::async_trait]
 impl GrpcInferenceService for KserveService {
     async fn model_infer(
@@ -390,38 +415,76 @@ impl GrpcInferenceService for KserveService {
             .find(|card| request_model_name == &card.display_name)
         {
             if card.model_type.supports_tensor() {
-                if let Some(tensor_model_config) = card.runtime_config.tensor_model_config.as_ref()
-                {
-                    return Ok(Response::new(ModelMetadataResponse {
-                        name: tensor_model_config.name.clone(),
-                        versions: vec!["1".to_string()],
-                        platform: "dynamo".to_string(),
-                        inputs: tensor_model_config
-                            .inputs
-                            .iter()
-                            .map(|input| inference::model_metadata_response::TensorMetadata {
-                                name: input.name.clone(),
-                                datatype: input.data_type.to_string(),
-                                shape: input.shape.clone(),
-                            })
-                            .collect(),
-                        outputs: tensor_model_config
-                            .outputs
-                            .iter()
-                            .map(
-                                |output| inference::model_metadata_response::TensorMetadata {
-                                    name: output.name.clone(),
-                                    datatype: output.data_type.to_string(),
-                                    shape: output.shape.clone(),
-                                },
-                            )
-                            .collect(),
-                    }));
+                let config = Config::from_runtime_config(&card.runtime_config).map_err(|e| {
+                    Status::invalid_argument(format!(
+                        "Model '{}' has type Tensor but: {}",
+                        request_model_name, e
+                    ))
+                })?;
+                match config {
+                    Config::Triton(model_config) => {
+                        return Ok(Response::new(ModelMetadataResponse {
+                            name: model_config.name,
+                            versions: vec!["1".to_string()],
+                            platform: model_config.platform,
+                            inputs: model_config
+                                .input
+                                .iter()
+                                .map(|input| inference::model_metadata_response::TensorMetadata {
+                                    name: input.name.clone(),
+                                    datatype: match inference::DataType::try_from(input.data_type) {
+                                        Ok(dt) => dt.as_str_name().to_string(),
+                                        Err(_) => "TYPE_INVALID".to_string(),
+                                    },
+                                    shape: input.dims.clone(),
+                                })
+                                .collect(),
+                            outputs: model_config
+                                .output
+                                .iter()
+                                .map(
+                                    |output| inference::model_metadata_response::TensorMetadata {
+                                        name: output.name.clone(),
+                                        datatype: match inference::DataType::try_from(
+                                            output.data_type,
+                                        ) {
+                                            Ok(dt) => dt.as_str_name().to_string(),
+                                            Err(_) => "TYPE_INVALID".to_string(),
+                                        },
+                                        shape: output.dims.clone(),
+                                    },
+                                )
+                                .collect(),
+                        }));
+                    }
+                    Config::Dynamo(model_config) => {
+                        return Ok(Response::new(ModelMetadataResponse {
+                            name: model_config.name.clone(),
+                            versions: vec!["1".to_string()],
+                            platform: "dynamo".to_string(),
+                            inputs: model_config
+                                .inputs
+                                .iter()
+                                .map(|input| inference::model_metadata_response::TensorMetadata {
+                                    name: input.name.clone(),
+                                    datatype: input.data_type.to_string(),
+                                    shape: input.shape.clone(),
+                                })
+                                .collect(),
+                            outputs: model_config
+                                .outputs
+                                .iter()
+                                .map(
+                                    |output| inference::model_metadata_response::TensorMetadata {
+                                        name: output.name.clone(),
+                                        datatype: output.data_type.to_string(),
+                                        shape: output.shape.clone(),
+                                    },
+                                )
+                                .collect(),
+                        }));
+                    }
                 }
-                Err(Status::invalid_argument(format!(
-                    "Model '{}' has type Tensor but no model config is provided",
-                    request_model_name
-                )))?
             } else if card.model_type.supports_completions() {
                 return Ok(Response::new(ModelMetadataResponse {
                     name: card.display_name,
@@ -471,42 +534,50 @@ impl GrpcInferenceService for KserveService {
             .find(|card| request_model_name == &card.display_name)
         {
             if card.model_type.supports_tensor() {
-                if let Some(tensor_model_config) = card.runtime_config.tensor_model_config.as_ref()
-                {
-                    let model_config = ModelConfig {
-                        name: tensor_model_config.name.clone(),
-                        platform: "dynamo".to_string(),
-                        backend: "dynamo".to_string(),
-                        input: tensor_model_config
-                            .inputs
-                            .iter()
-                            .map(|input| ModelInput {
-                                name: input.name.clone(),
-                                data_type: input.data_type.to_kserve(),
-                                dims: input.shape.clone(),
-                                ..Default::default()
-                            })
-                            .collect(),
-                        output: tensor_model_config
-                            .outputs
-                            .iter()
-                            .map(|output| ModelOutput {
-                                name: output.name.clone(),
-                                data_type: output.data_type.to_kserve(),
-                                dims: output.shape.clone(),
-                                ..Default::default()
-                            })
-                            .collect(),
-                        ..Default::default()
-                    };
-                    return Ok(Response::new(ModelConfigResponse {
-                        config: Some(model_config.clone()),
-                    }));
+                let config = Config::from_runtime_config(&card.runtime_config).map_err(|e| {
+                    Status::invalid_argument(format!(
+                        "Model '{}' has type Tensor but: {}",
+                        request_model_name, e
+                    ))
+                })?;
+                match config {
+                    Config::Triton(model_config) => {
+                        return Ok(Response::new(ModelConfigResponse {
+                            config: Some(model_config),
+                        }));
+                    }
+                    Config::Dynamo(tensor_model_config) => {
+                        let model_config = ModelConfig {
+                            name: tensor_model_config.name.clone(),
+                            platform: "dynamo".to_string(),
+                            backend: "dynamo".to_string(),
+                            input: tensor_model_config
+                                .inputs
+                                .iter()
+                                .map(|input| ModelInput {
+                                    name: input.name.clone(),
+                                    data_type: input.data_type.to_kserve(),
+                                    dims: input.shape.clone(),
+                                    ..Default::default()
+                                })
+                                .collect(),
+                            output: tensor_model_config
+                                .outputs
+                                .iter()
+                                .map(|output| ModelOutput {
+                                    name: output.name.clone(),
+                                    data_type: output.data_type.to_kserve(),
+                                    dims: output.shape.clone(),
+                                    ..Default::default()
+                                })
+                                .collect(),
+                            ..Default::default()
+                        };
+                        return Ok(Response::new(ModelConfigResponse {
+                            config: Some(model_config.clone()),
+                        }));
+                    }
                 }
-                Err(Status::invalid_argument(format!(
-                    "Model '{}' has type Tensor but no model config is provided",
-                    request_model_name
-                )))?
             } else if card.model_type.supports_completions() {
                 let config = ModelConfig {
                     name: card.display_name,

@@ -124,11 +124,15 @@ pub struct TensorMetadata {
     pub parameters: Parameters,
 }
 
-#[derive(Serialize, Deserialize, Validate, Debug, Clone, PartialEq)]
+#[derive(Serialize, Deserialize, Validate, Debug, Clone, PartialEq, Default)]
 pub struct TensorModelConfig {
     pub name: String,
     pub inputs: Vec<TensorMetadata>,
     pub outputs: Vec<TensorMetadata>,
+    // Optional Triton model config in serialized protobuf string,
+    // if provided, it supersedes the basic model config defined above.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub triton_model_config: Option<Vec<u8>>,
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone)]