diff --git a/Cargo.toml b/Cargo.toml index 2a4b4d8..4e0c516 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,7 +69,9 @@ logforth = { version = "0.29.1", features = [ "diagnostic-fastrace", "append-fastrace", ] } -opentelemetry-semantic-conventions = "0.31.0" +opentelemetry-semantic-conventions = { version = "0.31.0", features = [ + "semconv_experimental", +] } metrics = "0.24.2" metrics-exporter-otel = "0.3.0" pin-project = "1.1.10" diff --git a/src/proxy/handlers/chat_completions/mod.rs b/src/proxy/handlers/chat_completions/mod.rs index ddb2065..c729973 100644 --- a/src/proxy/handlers/chat_completions/mod.rs +++ b/src/proxy/handlers/chat_completions/mod.rs @@ -13,6 +13,7 @@ use axum::{ }; use fastrace::prelude::{Event as TraceEvent, *}; use log::error; +use opentelemetry_semantic_conventions::attribute::GEN_AI_RESPONSE_FINISH_REASONS; use span_attributes::{ StreamOutputCollector, apply_span_properties, chunk_span_properties, request_span_properties, response_span_properties, usage_span_properties, @@ -72,7 +73,7 @@ pub async fn chat_completions( let provider_instance = create_provider_instance(gateway.as_ref(), &provider)?; let provider_base_url = provider_instance.effective_base_url().ok(); - let span = Span::enter_with_local_parent("aisix.llm.chat_completion"); + let span = Span::enter_with_local_parent("aisix.llm.chat_completions"); apply_span_properties( &span, request_span_properties( @@ -197,7 +198,7 @@ async fn handle_stream_request( properties .iter() .filter(|(key, _)| { - key == "gen_ai.response.finish_reasons" + key == GEN_AI_RESPONSE_FINISH_REASONS || key == "llm.finish_reason" || key == "llm.token_count.completion_details.reasoning" }) diff --git a/src/proxy/handlers/chat_completions/span_attributes/telemetry.rs b/src/proxy/handlers/chat_completions/span_attributes/telemetry.rs index e40171e..7801e8b 100644 --- a/src/proxy/handlers/chat_completions/span_attributes/telemetry.rs +++ b/src/proxy/handlers/chat_completions/span_attributes/telemetry.rs @@ -1,3 +1,11 @@ +use opentelemetry_semantic_conventions::attribute::{ + GEN_AI_OPERATION_NAME, GEN_AI_OUTPUT_TYPE, GEN_AI_REQUEST_CHOICE_COUNT, + GEN_AI_REQUEST_FREQUENCY_PENALTY, GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, + GEN_AI_REQUEST_PRESENCE_PENALTY, GEN_AI_REQUEST_SEED, GEN_AI_REQUEST_STOP_SEQUENCES, + GEN_AI_REQUEST_TEMPERATURE, GEN_AI_REQUEST_TOP_K, GEN_AI_REQUEST_TOP_P, GEN_AI_RESPONSE_ID, + GEN_AI_RESPONSE_MODEL, GEN_AI_USAGE_INPUT_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, SERVER_ADDRESS, + SERVER_PORT, USER_ID, +}; use reqwest::Url; use serde_json::{Map, Value}; @@ -35,7 +43,7 @@ pub(in crate::proxy::handlers::chat_completions) fn request_span_properties( .map(message_view_from_chat_message) .collect(); let mut properties = vec![ - ("gen_ai.operation.name".into(), "chat".into()), + (GEN_AI_OPERATION_NAME.into(), "chat".into()), ("openinference.span.kind".into(), "LLM".into()), ( "gen_ai.provider.name".into(), @@ -45,7 +53,7 @@ pub(in crate::proxy::handlers::chat_completions) fn request_span_properties( "llm.system".into(), provider_semantics.llm_system.to_string(), ), - ("gen_ai.request.model".into(), request.model.clone()), + (GEN_AI_REQUEST_MODEL.into(), request.model.clone()), ]; if let Some(llm_provider) = provider_semantics.llm_provider { @@ -53,46 +61,43 @@ pub(in crate::proxy::handlers::chat_completions) fn request_span_properties( } if let Some(choice_count) = request.n.filter(|count| *count != 1) { - properties.push(( - "gen_ai.request.choice.count".into(), - choice_count.to_string(), - )); + properties.push((GEN_AI_REQUEST_CHOICE_COUNT.into(), choice_count.to_string())); } if let Some(seed) = request.seed { - properties.push(("gen_ai.request.seed".into(), seed.to_string())); + properties.push((GEN_AI_REQUEST_SEED.into(), seed.to_string())); } if let Some(max_tokens) = request.max_completion_tokens.or(request.max_tokens) { - properties.push(("gen_ai.request.max_tokens".into(), max_tokens.to_string())); + properties.push((GEN_AI_REQUEST_MAX_TOKENS.into(), max_tokens.to_string())); } if let Some(value) = request.frequency_penalty { - properties.push(("gen_ai.request.frequency_penalty".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_FREQUENCY_PENALTY.into(), value.to_string())); } if let Some(value) = request.presence_penalty { - properties.push(("gen_ai.request.presence_penalty".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_PRESENCE_PENALTY.into(), value.to_string())); } if let Some(value) = request.temperature { - properties.push(("gen_ai.request.temperature".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_TEMPERATURE.into(), value.to_string())); } if let Some(value) = request.top_p { - properties.push(("gen_ai.request.top_p".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_TOP_P.into(), value.to_string())); } if let Some(value) = numeric_extra_to_string(request.extra.get("top_k")) { - properties.push(("gen_ai.request.top_k".into(), value)); + properties.push((GEN_AI_REQUEST_TOP_K.into(), value)); } if let Some(value) = stop_sequences_json(request.stop.as_ref()) { - properties.push(("gen_ai.request.stop_sequences".into(), value)); + properties.push((GEN_AI_REQUEST_STOP_SEQUENCES.into(), value)); } if let Some(value) = response_format_output_type(request.response_format.as_ref()) { - properties.push(("gen_ai.output.type".into(), value.to_string())); + properties.push((GEN_AI_OUTPUT_TYPE.into(), value.to_string())); } if let Some(value) = request_invocation_parameters(request) { @@ -100,7 +105,7 @@ pub(in crate::proxy::handlers::chat_completions) fn request_span_properties( } if let Some(user_id) = request.user.as_ref().filter(|user_id| !user_id.is_empty()) { - properties.push(("user.id".into(), user_id.clone())); + properties.push((USER_ID.into(), user_id.clone())); } append_openinference_message_properties(&mut properties, "llm.input_messages", &input_messages); @@ -119,10 +124,10 @@ pub(in crate::proxy::handlers::chat_completions) fn request_span_properties( if let Some(base_url) = base_url { if let Some(address) = base_url.host_str() { - properties.push(("server.address".into(), address.to_string())); + properties.push((SERVER_ADDRESS.into(), address.to_string())); } if let Some(port) = base_url.port_or_known_default() { - properties.push(("server.port".into(), port.to_string())); + properties.push((SERVER_PORT.into(), port.to_string())); } } @@ -135,8 +140,8 @@ pub(in crate::proxy::handlers::chat_completions) fn response_span_properties( ) -> Vec<(String, String)> { let output_messages = response_output_message_views(response); let mut properties = vec![ - ("gen_ai.response.id".into(), response.id.clone()), - ("gen_ai.response.model".into(), response.model.clone()), + (GEN_AI_RESPONSE_ID.into(), response.id.clone()), + (GEN_AI_RESPONSE_MODEL.into(), response.model.clone()), ("llm.model_name".into(), response.model.clone()), ]; @@ -164,11 +169,11 @@ pub(in crate::proxy::handlers::chat_completions) fn chunk_span_properties( let mut properties = Vec::new(); if !chunk.id.is_empty() { - properties.push(("gen_ai.response.id".into(), chunk.id.clone())); + properties.push((GEN_AI_RESPONSE_ID.into(), chunk.id.clone())); } if !chunk.model.is_empty() { - properties.push(("gen_ai.response.model".into(), chunk.model.clone())); + properties.push((GEN_AI_RESPONSE_MODEL.into(), chunk.model.clone())); properties.push(("llm.model_name".into(), chunk.model.clone())); } @@ -297,13 +302,13 @@ fn append_response_usage_properties( if usage.input_tokens.is_none() { let input_tokens = raw_usage.prompt_tokens.to_string(); - properties.push(("gen_ai.usage.input_tokens".into(), input_tokens.clone())); + properties.push((GEN_AI_USAGE_INPUT_TOKENS.into(), input_tokens.clone())); properties.push(("llm.token_count.prompt".into(), input_tokens)); } if usage.output_tokens.is_none() { let output_tokens = raw_usage.completion_tokens.to_string(); - properties.push(("gen_ai.usage.output_tokens".into(), output_tokens.clone())); + properties.push((GEN_AI_USAGE_OUTPUT_TOKENS.into(), output_tokens.clone())); properties.push(("llm.token_count.completion".into(), output_tokens)); } diff --git a/src/proxy/handlers/embeddings/mod.rs b/src/proxy/handlers/embeddings/mod.rs index bdd8e1a..d2b5c19 100644 --- a/src/proxy/handlers/embeddings/mod.rs +++ b/src/proxy/handlers/embeddings/mod.rs @@ -1,3 +1,4 @@ +mod span_attributes; mod types; use std::time::Duration; @@ -7,7 +8,9 @@ use axum::{ extract::State, response::{IntoResponse, Response}, }; +use fastrace::prelude::*; use log::error; +use span_attributes::{request_span_properties, response_span_properties}; pub use types::EmbeddingError; use crate::{ @@ -23,8 +26,9 @@ use crate::{ AppState, hooks::{self, RequestContext}, provider::create_provider_instance, + utils::trace::span_attributes::apply_span_properties, }, - utils::future::maybe_timeout, + utils::future::{WithSpan, maybe_timeout}, }; fn embedding_usage(response: &EmbeddingResponse) -> Usage { @@ -38,7 +42,6 @@ fn embedding_usage(response: &EmbeddingResponse) -> Usage { } } -#[fastrace::trace] pub async fn embeddings( State(state): State, mut request_ctx: RequestContext, @@ -61,14 +64,32 @@ pub async fn embeddings( GatewayError::Internal(format!("provider {} not found", model.provider_id)) })?; let provider_instance = create_provider_instance(gateway.as_ref(), &provider)?; + let provider_base_url = provider_instance.effective_base_url().ok(); let timeout = model.timeout.map(Duration::from_millis); // Replace request model name with real model name request_data.model = model.model.clone(); - match maybe_timeout(timeout, gateway.embed(&request_data, &provider_instance)).await { + let span = Span::enter_with_local_parent("aisix.llm.embeddings"); + apply_span_properties( + &span, + request_span_properties( + &request_data, + provider_instance.def.as_ref(), + provider_base_url.as_ref(), + ), + ); + + let (response, span) = (WithSpan { + inner: maybe_timeout(timeout, gateway.embed(&request_data, &provider_instance)), + span: Some(span), + }) + .await; + + match response { Ok(Ok(response)) => { let usage = embedding_usage(&response); + span.add_properties(|| response_span_properties(&response, &usage)); let mut resp = Json(response).into_response(); if let Err(err) = hooks::rate_limit::post_check(&mut request_ctx, &usage).await { error!("Rate limit post_check error: {}", err); @@ -79,9 +100,13 @@ pub async fn embeddings( Ok(resp) } Ok(Err(err)) => { + span.add_property(|| ("error.type", "gateway_error")); error!("Error generating embeddings: {}", err); Err(EmbeddingError::GatewayError(err)) } - Err(err) => Err(EmbeddingError::Timeout(err)), + Err(err) => { + span.add_property(|| ("error.type", "timeout")); + Err(EmbeddingError::Timeout(err)) + } } } diff --git a/src/proxy/handlers/embeddings/span_attributes.rs b/src/proxy/handlers/embeddings/span_attributes.rs new file mode 100644 index 0000000..5f1d204 --- /dev/null +++ b/src/proxy/handlers/embeddings/span_attributes.rs @@ -0,0 +1,303 @@ +use opentelemetry_semantic_conventions::attribute::{ + GEN_AI_OPERATION_NAME, GEN_AI_REQUEST_ENCODING_FORMATS, GEN_AI_REQUEST_MODEL, + GEN_AI_RESPONSE_MODEL, SERVER_ADDRESS, SERVER_PORT, USER_ID, +}; +use reqwest::Url; +use serde_json::{Map, Value}; + +use crate::{ + gateway::{ + traits::ProviderCapabilities, + types::{ + common::Usage, + embed::{EmbeddingRequest, EmbeddingResponse, OneOrMany}, + }, + }, + proxy::utils::trace::span_attributes::append_usage_properties, +}; + +pub(super) fn request_span_properties( + request: &EmbeddingRequest, + provider: &dyn ProviderCapabilities, + base_url: Option<&Url>, +) -> Vec<(String, String)> { + let provider_semantics = provider.semantic_conventions(); + let input_texts = request_input_texts(request); + let mut properties = vec![ + (GEN_AI_OPERATION_NAME.into(), "embeddings".into()), + ("openinference.span.kind".into(), "EMBEDDING".into()), + ( + "gen_ai.provider.name".into(), + provider_semantics.gen_ai_provider_name.to_string(), + ), + (GEN_AI_REQUEST_MODEL.into(), request.model.clone()), + ("embedding.model_name".into(), request.model.clone()), + ("input.mime_type".into(), "application/json".into()), + ]; + + if let Some(value) = encoding_formats_json(request.encoding_format.as_deref()) { + properties.push((GEN_AI_REQUEST_ENCODING_FORMATS.into(), value)); + } + + if let Some(value) = embedding_invocation_parameters(request) { + properties.push(("embedding.invocation_parameters".into(), value)); + } + + if let Ok(value) = serde_json::to_string(&request.input) { + properties.push(("input.value".into(), value)); + } + + if let Some(user_id) = request.user.as_ref().filter(|user_id| !user_id.is_empty()) { + properties.push((USER_ID.into(), user_id.clone())); + } + + for (index, text) in input_texts.iter().enumerate() { + properties.push(( + format!("embedding.embeddings.{index}.embedding.text"), + text.clone(), + )); + } + + if let Some(base_url) = base_url { + if let Some(address) = base_url.host_str() { + properties.push((SERVER_ADDRESS.into(), address.to_string())); + } + if let Some(port) = base_url.port_or_known_default() { + properties.push((SERVER_PORT.into(), port.to_string())); + } + } + + properties +} + +pub(super) fn response_span_properties( + response: &EmbeddingResponse, + usage: &Usage, +) -> Vec<(String, String)> { + let mut properties = vec![ + (GEN_AI_RESPONSE_MODEL.into(), response.model.clone()), + ("output.mime_type".into(), "application/json".into()), + ]; + + if let Some(first_embedding) = response.data.first() { + properties.push(( + "gen_ai.embeddings.dimension.count".into(), + first_embedding.embedding.len().to_string(), + )); + } + + if let Ok(value) = serde_json::to_string(response) { + properties.push(("output.value".into(), value)); + } + + for (index, data) in response.data.iter().enumerate() { + if let Ok(value) = serde_json::to_string(&data.embedding) { + properties.push(( + format!("embedding.embeddings.{index}.embedding.vector"), + value, + )); + } + } + + append_usage_properties(&mut properties, usage); + properties +} + +fn request_input_texts(request: &EmbeddingRequest) -> Vec { + match &request.input { + OneOrMany::One(value) => vec![value.clone()], + OneOrMany::Many(values) => values.clone(), + } +} + +fn encoding_formats_json(encoding_format: Option<&str>) -> Option { + let encoding_format = encoding_format?.trim(); + if encoding_format.is_empty() { + return None; + } + + serde_json::to_string(&vec![encoding_format]).ok() +} + +fn embedding_invocation_parameters(request: &EmbeddingRequest) -> Option { + let mut params = Map::new(); + params.insert("model".into(), Value::String(request.model.clone())); + + if let Some(value) = request.dimensions { + params.insert("dimensions".into(), Value::from(value)); + } + + if let Some(value) = request.encoding_format.as_ref() { + params.insert("encoding_format".into(), Value::String(value.clone())); + } + + serde_json::to_string(&Value::Object(params)).ok() +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + use reqwest::Url; + use serde_json::{Value, json}; + + use super::{request_span_properties, response_span_properties}; + use crate::gateway::{ + providers::openai::OpenAIDef, + types::{ + common::Usage, + embed::{EmbeddingRequest, EmbeddingResponse}, + }, + }; + + fn property_value<'a>(properties: &'a [(String, String)], key: &str) -> Option<&'a str> { + properties + .iter() + .find(|(property_key, _)| property_key == key) + .map(|(_, value)| value.as_str()) + } + + #[test] + fn request_span_properties_follow_embedding_semantic_conventions() { + let request: EmbeddingRequest = serde_json::from_value(json!({ + "model": "text-embedding-3-large", + "input": ["hello", "world"], + "dimensions": 256, + "encoding_format": "float", + "user": "user-123" + })) + .unwrap(); + let provider = OpenAIDef; + let base_url = Url::parse("https://api.openai.com/v1").unwrap(); + + let properties = request_span_properties(&request, &provider, Some(&base_url)); + + assert_eq!( + property_value(&properties, "gen_ai.operation.name"), + Some("embeddings") + ); + assert_eq!( + property_value(&properties, "openinference.span.kind"), + Some("EMBEDDING") + ); + assert_eq!( + property_value(&properties, "gen_ai.provider.name"), + Some("openai") + ); + assert_eq!( + property_value(&properties, "gen_ai.request.model"), + Some("text-embedding-3-large") + ); + assert_eq!( + property_value(&properties, "embedding.model_name"), + Some("text-embedding-3-large") + ); + assert_eq!( + property_value(&properties, "input.mime_type"), + Some("application/json") + ); + assert_eq!(property_value(&properties, "user.id"), Some("user-123")); + assert_eq!( + property_value(&properties, "embedding.embeddings.0.embedding.text"), + Some("hello") + ); + assert_eq!( + property_value(&properties, "embedding.embeddings.1.embedding.text"), + Some("world") + ); + assert_eq!( + property_value(&properties, "server.address"), + Some("api.openai.com") + ); + assert_eq!(property_value(&properties, "server.port"), Some("443")); + assert_eq!(property_value(&properties, "llm.system"), None); + assert_eq!(property_value(&properties, "llm.provider"), None); + + let input_value: Value = + serde_json::from_str(property_value(&properties, "input.value").unwrap()).unwrap(); + assert_eq!(input_value, json!(["hello", "world"])); + + let encoding_formats: Value = serde_json::from_str( + property_value(&properties, "gen_ai.request.encoding_formats").unwrap(), + ) + .unwrap(); + assert_eq!(encoding_formats, json!(["float"])); + + let invocation_parameters: Value = serde_json::from_str( + property_value(&properties, "embedding.invocation_parameters").unwrap(), + ) + .unwrap(); + assert_eq!( + invocation_parameters, + json!({ + "model": "text-embedding-3-large", + "dimensions": 256, + "encoding_format": "float" + }) + ); + } + + #[test] + fn response_span_properties_include_vectors_and_usage() { + let response: EmbeddingResponse = serde_json::from_value(json!({ + "object": "list", + "data": [{ + "object": "embedding", + "embedding": [0.1, 0.2], + "index": 0 + }, { + "object": "embedding", + "embedding": [0.3, 0.4], + "index": 1 + }], + "model": "text-embedding-3-large", + "usage": { + "prompt_tokens": 8, + "total_tokens": 8 + } + })) + .unwrap(); + let usage = Usage { + input_tokens: Some(8), + total_tokens: Some(8), + ..Default::default() + }; + + let properties = response_span_properties(&response, &usage); + + assert_eq!( + property_value(&properties, "gen_ai.response.model"), + Some("text-embedding-3-large") + ); + assert_eq!( + property_value(&properties, "output.mime_type"), + Some("application/json") + ); + assert_eq!( + property_value(&properties, "gen_ai.embeddings.dimension.count"), + Some("2") + ); + assert_eq!( + property_value(&properties, "llm.token_count.prompt"), + Some("8") + ); + assert_eq!( + property_value(&properties, "llm.token_count.total"), + Some("8") + ); + + let output_value: Value = + serde_json::from_str(property_value(&properties, "output.value").unwrap()).unwrap(); + assert_eq!(output_value["model"], "text-embedding-3-large"); + + let vector0: Value = serde_json::from_str( + property_value(&properties, "embedding.embeddings.0.embedding.vector").unwrap(), + ) + .unwrap(); + let vector1: Value = serde_json::from_str( + property_value(&properties, "embedding.embeddings.1.embedding.vector").unwrap(), + ) + .unwrap(); + assert_eq!(vector0, json!([0.1, 0.2])); + assert_eq!(vector1, json!([0.3, 0.4])); + } +} diff --git a/src/proxy/handlers/messages/span_attributes/telemetry.rs b/src/proxy/handlers/messages/span_attributes/telemetry.rs index 9700877..2ec7782 100644 --- a/src/proxy/handlers/messages/span_attributes/telemetry.rs +++ b/src/proxy/handlers/messages/span_attributes/telemetry.rs @@ -1,3 +1,9 @@ +use opentelemetry_semantic_conventions::attribute::{ + GEN_AI_OPERATION_NAME, GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_MODEL, + GEN_AI_REQUEST_STOP_SEQUENCES, GEN_AI_REQUEST_TEMPERATURE, GEN_AI_REQUEST_TOP_K, + GEN_AI_REQUEST_TOP_P, GEN_AI_RESPONSE_ID, GEN_AI_RESPONSE_MODEL, GEN_AI_USAGE_INPUT_TOKENS, + GEN_AI_USAGE_OUTPUT_TOKENS, SERVER_ADDRESS, SERVER_PORT, USER_ID, +}; use reqwest::Url; use serde_json::{Map, Value}; @@ -30,7 +36,7 @@ pub(in crate::proxy::handlers::messages) fn request_span_properties( let provider_semantics = provider.semantic_conventions(); let input_messages = request_input_message_views(request); let mut properties = vec![ - ("gen_ai.operation.name".into(), "chat".into()), + (GEN_AI_OPERATION_NAME.into(), "chat".into()), ("openinference.span.kind".into(), "LLM".into()), ( "gen_ai.provider.name".into(), @@ -40,9 +46,9 @@ pub(in crate::proxy::handlers::messages) fn request_span_properties( "llm.system".into(), provider_semantics.llm_system.to_string(), ), - ("gen_ai.request.model".into(), request.model.clone()), + (GEN_AI_REQUEST_MODEL.into(), request.model.clone()), ( - "gen_ai.request.max_tokens".into(), + GEN_AI_REQUEST_MAX_TOKENS.into(), request.max_tokens.to_string(), ), ]; @@ -52,19 +58,19 @@ pub(in crate::proxy::handlers::messages) fn request_span_properties( } if let Some(value) = request.temperature { - properties.push(("gen_ai.request.temperature".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_TEMPERATURE.into(), value.to_string())); } if let Some(value) = request.top_p { - properties.push(("gen_ai.request.top_p".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_TOP_P.into(), value.to_string())); } if let Some(value) = request.top_k { - properties.push(("gen_ai.request.top_k".into(), value.to_string())); + properties.push((GEN_AI_REQUEST_TOP_K.into(), value.to_string())); } if let Some(value) = stop_sequences_json(request.stop_sequences.as_deref()) { - properties.push(("gen_ai.request.stop_sequences".into(), value)); + properties.push((GEN_AI_REQUEST_STOP_SEQUENCES.into(), value)); } if let Some(value) = request_invocation_parameters(request) { @@ -77,7 +83,7 @@ pub(in crate::proxy::handlers::messages) fn request_span_properties( .and_then(|metadata| metadata.user_id.as_ref()) .filter(|user_id| !user_id.is_empty()) { - properties.push(("user.id".into(), user_id.clone())); + properties.push((USER_ID.into(), user_id.clone())); } append_openinference_message_properties(&mut properties, "llm.input_messages", &input_messages); @@ -96,10 +102,10 @@ pub(in crate::proxy::handlers::messages) fn request_span_properties( if let Some(base_url) = base_url { if let Some(address) = base_url.host_str() { - properties.push(("server.address".into(), address.to_string())); + properties.push((SERVER_ADDRESS.into(), address.to_string())); } if let Some(port) = base_url.port_or_known_default() { - properties.push(("server.port".into(), port.to_string())); + properties.push((SERVER_PORT.into(), port.to_string())); } } @@ -112,8 +118,8 @@ pub(in crate::proxy::handlers::messages) fn response_span_properties( ) -> Vec<(String, String)> { let output_messages = response_output_message_views(response); let mut properties = vec![ - ("gen_ai.response.id".into(), response.id.clone()), - ("gen_ai.response.model".into(), response.model.clone()), + (GEN_AI_RESPONSE_ID.into(), response.id.clone()), + (GEN_AI_RESPONSE_MODEL.into(), response.model.clone()), ("llm.model_name".into(), response.model.clone()), ]; @@ -142,8 +148,8 @@ pub(in crate::proxy::handlers::messages) fn chunk_span_properties( match event { AnthropicStreamEvent::MessageStart { message } => { - properties.push(("gen_ai.response.id".into(), message.id.clone())); - properties.push(("gen_ai.response.model".into(), message.model.clone())); + properties.push((GEN_AI_RESPONSE_ID.into(), message.id.clone())); + properties.push((GEN_AI_RESPONSE_MODEL.into(), message.model.clone())); properties.push(("llm.model_name".into(), message.model.clone())); append_message_start_usage_properties(&mut properties, &message.usage); } @@ -224,13 +230,13 @@ fn append_response_usage_properties( if usage.input_tokens.is_none() { let input_tokens = raw_input_tokens.to_string(); - properties.push(("gen_ai.usage.input_tokens".into(), input_tokens.clone())); + properties.push((GEN_AI_USAGE_INPUT_TOKENS.into(), input_tokens.clone())); properties.push(("llm.token_count.prompt".into(), input_tokens)); } if usage.output_tokens.is_none() { let output_tokens = raw_usage.output_tokens.to_string(); - properties.push(("gen_ai.usage.output_tokens".into(), output_tokens.clone())); + properties.push((GEN_AI_USAGE_OUTPUT_TOKENS.into(), output_tokens.clone())); properties.push(("llm.token_count.completion".into(), output_tokens)); } @@ -304,13 +310,13 @@ fn append_message_usage_values( if let Some(input_tokens) = input_tokens { let input_tokens = input_tokens.to_string(); - properties.push(("gen_ai.usage.input_tokens".into(), input_tokens.clone())); + properties.push((GEN_AI_USAGE_INPUT_TOKENS.into(), input_tokens.clone())); properties.push(("llm.token_count.prompt".into(), input_tokens)); } if let Some(output_tokens) = output_tokens { let output_tokens = output_tokens.to_string(); - properties.push(("gen_ai.usage.output_tokens".into(), output_tokens.clone())); + properties.push((GEN_AI_USAGE_OUTPUT_TOKENS.into(), output_tokens.clone())); properties.push(("llm.token_count.completion".into(), output_tokens)); } diff --git a/src/proxy/utils/trace/span_attributes.rs b/src/proxy/utils/trace/span_attributes.rs index 4a73e59..d298422 100644 --- a/src/proxy/utils/trace/span_attributes.rs +++ b/src/proxy/utils/trace/span_attributes.rs @@ -1,4 +1,7 @@ use fastrace::prelude::Span; +use opentelemetry_semantic_conventions::attribute::{ + GEN_AI_RESPONSE_FINISH_REASONS, GEN_AI_USAGE_INPUT_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, +}; use crate::gateway::types::common::Usage; @@ -26,7 +29,7 @@ pub(crate) fn append_finish_reason_properties( } properties.push(( - "gen_ai.response.finish_reasons".into(), + GEN_AI_RESPONSE_FINISH_REASONS.into(), serde_json::to_string(&finish_reasons).unwrap_or_default(), )); @@ -38,13 +41,13 @@ pub(crate) fn append_finish_reason_properties( pub(crate) fn append_usage_properties(properties: &mut Vec<(String, String)>, usage: &Usage) { if let Some(input_tokens) = usage.input_tokens { let input_tokens = input_tokens.to_string(); - properties.push(("gen_ai.usage.input_tokens".into(), input_tokens.clone())); + properties.push((GEN_AI_USAGE_INPUT_TOKENS.into(), input_tokens.clone())); properties.push(("llm.token_count.prompt".into(), input_tokens)); } if let Some(output_tokens) = usage.output_tokens { let output_tokens = output_tokens.to_string(); - properties.push(("gen_ai.usage.output_tokens".into(), output_tokens.clone())); + properties.push((GEN_AI_USAGE_OUTPUT_TOKENS.into(), output_tokens.clone())); properties.push(("llm.token_count.completion".into(), output_tokens)); }