diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b0b036877e..a530eb29711 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ - Feature flags of graduated features are now hard-coded in Relay so they can be removed from Sentry. ([#4076](https://github.com/getsentry/relay/pull/4076), [#4080](https://github.com/getsentry/relay/pull/4080)) - Add parallelization in Redis commands. ([#4118](https://github.com/getsentry/relay/pull/4118)) - Extract user ip for spans. ([#4144](https://github.com/getsentry/relay/pull/4144)) +- Add support for an experimental OTLP `/v1/traces/` endpoint. The endpoint is disabled by default. ([#4223](https://github.com/getsentry/relay/pull/4223)) ## 24.9.0 diff --git a/Cargo.lock b/Cargo.lock index 1d383f2a66f..43c63e63cc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,28 @@ dependencies = [ "zstd-safe", ] +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.77", +] + [[package]] name = "async-trait" version = "0.1.68" @@ -1551,7 +1573,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap", + "indexmap 2.2.5", "slab", "tokio", "tokio-util", @@ -1577,6 +1599,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" @@ -1593,7 +1621,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -1813,6 +1841,19 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-timeout" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -1899,6 +1940,16 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.2.5" @@ -1906,7 +1957,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -2191,7 +2242,7 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ee39891760e7d94734f6f63fedc29a2e4a152f836120753a72503f09fcf904" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", ] [[package]] @@ -2958,7 +3009,7 @@ checksum = "70c501afe3a2e25c9bd219aa56ec1e04cdb3fcdd763055be268778c13fa82c1f" dependencies = [ "autocfg", "equivalent", - "indexmap", + "indexmap 2.2.5", ] [[package]] @@ -3005,9 +3056,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2ecbe40f08db5c006b5764a2645f7f3f141ce756412ac9e1dd6087e6d32995" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" dependencies = [ "bytes", "prost-derive", @@ -3015,9 +3066,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acf0c195eebb4af52c752bec4f52f645da98b6e92077a04110c7f349477ae5ac" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", "itertools 0.13.0", @@ -3329,7 +3380,7 @@ version = "24.10.0" dependencies = [ "criterion", "hash32", - "hashbrown", + "hashbrown 0.14.5", "parking_lot", "relay-base-schema", "relay-common", @@ -3500,7 +3551,7 @@ dependencies = [ name = "relay-filter" version = "24.10.0" dependencies = [ - "indexmap", + "indexmap 2.2.5", "insta", "ipnetwork", "once_cell", @@ -3554,7 +3605,7 @@ dependencies = [ "criterion", "fnv", "hash32", - "hashbrown", + "hashbrown 0.14.5", "insta", "itertools 0.13.0", "priority-queue", @@ -3688,7 +3739,7 @@ dependencies = [ name = "relay-quotas" version = "24.10.0" dependencies = [ - "hashbrown", + "hashbrown 0.14.5", "insta", "itertools 0.13.0", "relay-base-schema", @@ -3770,7 +3821,7 @@ dependencies = [ "flate2", "fnv", "futures", - "hashbrown", + "hashbrown 0.14.5", "http", "hyper", "hyper-util", @@ -3786,6 +3837,7 @@ dependencies = [ "papaya", "pin-project-lite", "priority-queue", + "prost", "rand", "rayon", "regex", @@ -4430,7 +4482,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap", + "indexmap 2.2.5", "itoa", "ryu", "serde", @@ -4658,10 +4710,10 @@ dependencies = [ "futures-intrusive", "futures-io", "futures-util", - "hashbrown", + "hashbrown 0.14.5", "hashlink", "hex", - "indexmap", + "indexmap 2.2.5", "log", "memchr", "once_cell", @@ -5212,7 +5264,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap", + "indexmap 2.2.5", "toml_datetime", "winnow", ] @@ -5223,16 +5275,25 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ + "async-stream", "async-trait", + "axum", "base64 0.22.1", "bytes", + "h2", "http", "http-body", "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", "percent-encoding", "pin-project", "prost", + "socket2", + "tokio", "tokio-stream", + "tower", "tower-layer", "tower-service", "tracing", @@ -5246,9 +5307,13 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", + "indexmap 1.9.3", "pin-project", "pin-project-lite", + "rand", + "slab", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 0626ba44f69..2842b9f5260 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,6 +131,7 @@ pin-project-lite = "0.2.12" pretty-hex = "0.4.1" priority-queue = "2.0.3" proc-macro2 = "1.0.8" +prost = "0.13.3" psl = "2.1.33" quote = "1.0.2" r2d2 = "0.8.10" diff --git a/relay-dynamic-config/src/feature.rs b/relay-dynamic-config/src/feature.rs index c79d775c195..2609729e3d2 100644 --- a/relay-dynamic-config/src/feature.rs +++ b/relay-dynamic-config/src/feature.rs @@ -56,7 +56,7 @@ pub enum Feature { /// Serialized as `organizations:standalone-span-ingestion`. #[serde(rename = "organizations:standalone-span-ingestion")] StandaloneSpanIngestion, - /// Enable standalone span ingestion via the `/spans/` OTel endpoint. + /// Enable standalone span ingestion via the `/traces/` OTel endpoint. /// /// Serialized as `projects:relay-otel-endpoint`. #[serde(rename = "projects:relay-otel-endpoint")] diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 0e24b7a312e..6737f7a30ab 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -58,6 +58,7 @@ once_cell = { workspace = true } papaya = { workspace = true } pin-project-lite = { workspace = true } priority-queue = { workspace = true } +prost = { workspace = true } rand = { workspace = true } rayon = { workspace = true } regex = { workspace = true } diff --git a/relay-server/src/endpoints/mod.rs b/relay-server/src/endpoints/mod.rs index d590c529a13..a42e21ecc7d 100644 --- a/relay-server/src/endpoints/mod.rs +++ b/relay-server/src/endpoints/mod.rs @@ -19,6 +19,7 @@ mod public_keys; mod security_report; mod statics; mod store; +mod traces; mod unreal; use axum::extract::DefaultBodyLimit; @@ -74,6 +75,7 @@ pub fn routes(config: &Config) -> Router{ .route("/api/:project_id/minidump/", minidump::route(config)) .route("/api/:project_id/events/:event_id/attachments/", post(attachments::handle)) .route("/api/:project_id/unreal/:sentry_key/", unreal::route(config)) + .route("/api/:project_id/otlp/v1/traces/", traces::route(config)) // NOTE: If you add a new (non-experimental) route here, please also list it in // https://github.com/getsentry/sentry-docs/blob/master/docs/product/relay/operating-guidelines.mdx .route_layer(middlewares::cors()); diff --git a/relay-server/src/endpoints/traces.rs b/relay-server/src/endpoints/traces.rs new file mode 100644 index 00000000000..33ab60e24a9 --- /dev/null +++ b/relay-server/src/endpoints/traces.rs @@ -0,0 +1,43 @@ +use axum::extract::{DefaultBodyLimit, Request}; +use axum::http::StatusCode; +use axum::response::IntoResponse; +use axum::routing::{post, MethodRouter}; +use axum::RequestExt; +use bytes::Bytes; +use relay_config::Config; +use relay_dynamic_config::Feature; + +use crate::endpoints::common; +use crate::envelope::{ContentType, Envelope, Item, ItemType}; +use crate::extractors::{RawContentType, RequestMeta}; +use crate::service::ServiceState; + +async fn handle( + state: ServiceState, + content_type: RawContentType, + meta: RequestMeta, + request: Request, +) -> axum::response::Result { + let content_type @ (ContentType::Json | ContentType::Protobuf) = + ContentType::from(content_type.as_ref()) + else { + return Ok(StatusCode::UNSUPPORTED_MEDIA_TYPE); + }; + let payload: Bytes = request.extract().await?; + let mut envelope = Envelope::from_request(None, meta); + envelope.require_feature(Feature::OtelEndpoint); + + envelope.add_item({ + let mut item = Item::new(ItemType::OtelTracesData); + item.set_payload(content_type, payload); + item + }); + + common::handle_envelope(&state, envelope).await?; + + Ok(StatusCode::ACCEPTED) +} + +pub fn route(config: &Config) -> MethodRouter { + post(handle).route_layer(DefaultBodyLimit::max(config.max_envelope_size())) +} diff --git a/relay-server/src/envelope.rs b/relay-server/src/envelope.rs index 33fd88a3890..4a9eee87c4e 100644 --- a/relay-server/src/envelope.rs +++ b/relay-server/src/envelope.rs @@ -119,8 +119,10 @@ pub enum ItemType { CheckIn, /// A standalone span. Span, - /// A standalone OpenTelemetry span. + /// A standalone OpenTelemetry span serialized as JSON. OtelSpan, + /// An OTLP TracesData container. + OtelTracesData, /// UserReport as an Event UserReportV2, /// ProfileChunk is a chunk of a profiling session. @@ -174,6 +176,7 @@ impl ItemType { Self::CheckIn => "check_in", Self::Span => "span", Self::OtelSpan => "otel_span", + Self::OtelTracesData => "otel_traces_data", Self::ProfileChunk => "profile_chunk", Self::Unknown(_) => "unknown", } @@ -226,6 +229,7 @@ impl std::str::FromStr for ItemType { "check_in" => Self::CheckIn, "span" => Self::Span, "otel_span" => Self::OtelSpan, + "otel_traces_data" => Self::OtelTracesData, "profile_chunk" => Self::ProfileChunk, other => Self::Unknown(other.to_owned()), }) @@ -253,6 +257,8 @@ pub enum ContentType { Xml, /// application/x-sentry-envelope Envelope, + /// "application/x-protobuf" + Protobuf, /// Any arbitrary content type not listed explicitly. Other(String), } @@ -268,6 +274,7 @@ impl ContentType { Self::Minidump => "application/x-dmp", Self::Xml => "text/xml", Self::Envelope => CONTENT_TYPE, + Self::Protobuf => "application/x-protobuf", Self::Other(ref other) => other, } } @@ -289,6 +296,8 @@ impl ContentType { Some(Self::Xml) } else if ct.eq_ignore_ascii_case(Self::Envelope.as_str()) { Some(Self::Envelope) + } else if ct.eq_ignore_ascii_case(Self::Protobuf.as_str()) { + Some(Self::Protobuf) } else { None } @@ -664,13 +673,19 @@ impl Item { pub fn quantity(&self) -> usize { match self.ty() { ItemType::Attachment => self.len().max(1), + // NOTE: This is semantically wrong. An otel trace contains may contain many spans, + // but we cannot easily count these before converting the trace into a series of spans. + ItemType::OtelTracesData => 1, _ => 1, } } /// True if the item represents any kind of span. pub fn is_span(&self) -> bool { - matches!(self.ty(), ItemType::OtelSpan | ItemType::Span) + matches!( + self.ty(), + ItemType::OtelSpan | ItemType::Span | ItemType::OtelTracesData + ) } /// Returns the data category used for generating outcomes. @@ -696,6 +711,7 @@ impl Item { ItemType::ClientReport => None, ItemType::CheckIn => Some(DataCategory::Monitor), ItemType::Span | ItemType::OtelSpan => Some(DataCategory::Span), + ItemType::OtelTracesData => None, ItemType::ProfileChunk => Some(DataCategory::ProfileChunk), ItemType::Unknown(_) => None, } @@ -933,6 +949,7 @@ impl Item { | ItemType::CheckIn | ItemType::Span | ItemType::OtelSpan + | ItemType::OtelTracesData | ItemType::ProfileChunk => false, // The unknown item type can observe any behavior, most likely there are going to be no @@ -942,8 +959,6 @@ impl Item { } /// Determines whether the given item requires an event with identifier. - /// - /// This is true for all items except session health events. pub fn requires_event(&self) -> bool { match self.ty() { ItemType::Event => true, @@ -968,6 +983,7 @@ impl Item { ItemType::CheckIn => false, ItemType::Span => false, ItemType::OtelSpan => false, + ItemType::OtelTracesData => false, ItemType::ProfileChunk => false, // Since this Relay cannot interpret the semantics of this item, it does not know diff --git a/relay-server/src/services/outcome.rs b/relay-server/src/services/outcome.rs index 22ec72b3e76..54a7600d25e 100644 --- a/relay-server/src/services/outcome.rs +++ b/relay-server/src/services/outcome.rs @@ -399,6 +399,9 @@ pub enum DiscardReason { /// (Relay) Parsing the event JSON payload failed due to a syntax error. InvalidJson, + /// (Relay) Parsing an OTLP payload failed. + InvalidProtobuf, + /// (Relay) Parsing the event msgpack payload failed due to a syntax error. InvalidMsgpack, @@ -485,6 +488,7 @@ impl DiscardReason { DiscardReason::InvalidJson => "invalid_json", DiscardReason::InvalidMultipart => "invalid_multipart", DiscardReason::InvalidMsgpack => "invalid_msgpack", + DiscardReason::InvalidProtobuf => "invalid_proto", DiscardReason::InvalidTransaction => "invalid_transaction", DiscardReason::InvalidEnvelope => "invalid_envelope", DiscardReason::InvalidCompression => "invalid_compression", diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index bbe574bfdd5..5666f9b6b1e 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -297,8 +297,12 @@ impl ProcessingGroup { } // Extract spans. - let span_items = envelope - .take_items_by(|item| matches!(item.ty(), &ItemType::Span | &ItemType::OtelSpan)); + let span_items = envelope.take_items_by(|item| { + matches!( + item.ty(), + &ItemType::Span | &ItemType::OtelSpan | &ItemType::OtelTracesData + ) + }); if !span_items.is_empty() { grouped_envelopes.push(( ProcessingGroup::Span, @@ -1859,6 +1863,7 @@ impl EnvelopeProcessorService { state: &mut ProcessEnvelopeState, ) -> Result<(), ProcessingError> { span::filter(state); + span::convert_otel_traces_data(state); if_processing!(self.inner.config, { let global_config = self.inner.global_config.current(); diff --git a/relay-server/src/services/processor/event.rs b/relay-server/src/services/processor/event.rs index 9c072d33b1e..73ff5c1fabb 100644 --- a/relay-server/src/services/processor/event.rs +++ b/relay-server/src/services/processor/event.rs @@ -443,6 +443,7 @@ fn is_duplicate(item: &Item, processing_enabled: bool) -> bool { ItemType::CheckIn => false, ItemType::Span => false, ItemType::OtelSpan => false, + ItemType::OtelTracesData => false, ItemType::ProfileChunk => false, // Without knowing more, `Unknown` items are allowed to be repeated diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index 00eb6e8d85d..edd064314d0 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -1,11 +1,17 @@ //! Processor code related to standalone spans. +use prost::Message; use relay_dynamic_config::Feature; use relay_event_normalization::span::tag_extraction; use relay_event_schema::protocol::{Event, Span}; use relay_protocol::Annotated; +use relay_quotas::DataCategory; +use relay_spans::otel_trace::TracesData; +use crate::envelope::{ContentType, Item, ItemType}; +use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::SpanGroup; +use crate::utils::TypedEnvelope; use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction}; #[cfg(feature = "processing")] @@ -15,16 +21,81 @@ pub use processing::*; pub fn filter(state: &mut ProcessEnvelopeState) { let disabled = state.should_filter(Feature::StandaloneSpanIngestion); + let otel_disabled = state.should_filter(Feature::OtelEndpoint); + state.managed_envelope.retain_items(|item| { - if item.is_span() && disabled { + if disabled && item.is_span() { relay_log::debug!("dropping span because feature is disabled"); ItemAction::DropSilently + } else if otel_disabled && item.ty() == &ItemType::OtelTracesData { + relay_log::debug!("dropping otel trace because feature is disabled"); + ItemAction::DropSilently } else { ItemAction::Keep } }); } +pub fn convert_otel_traces_data(state: &mut ProcessEnvelopeState) { + let envelope = state.managed_envelope.envelope_mut(); + + for item in envelope.take_items_by(|item| item.ty() == &ItemType::OtelTracesData) { + convert_traces_data(item, &mut state.managed_envelope); + } +} + +fn convert_traces_data(item: Item, managed_envelope: &mut TypedEnvelope) { + let traces_data = match parse_traces_data(item) { + Ok(traces_data) => traces_data, + Err(reason) => { + // NOTE: logging quantity=1 is semantically wrong, but we cannot know the real quantity + // without parsing. + track_invalid(managed_envelope, reason); + return; + } + }; + for resource in traces_data.resource_spans { + for scope in resource.scope_spans { + for span in scope.spans { + // TODO: resources and scopes contain attributes, should denormalize into spans? + let Ok(payload) = serde_json::to_vec(&span) else { + track_invalid(managed_envelope, DiscardReason::Internal); + continue; + }; + let mut item = Item::new(ItemType::OtelSpan); + item.set_payload(ContentType::Json, payload); + managed_envelope.envelope_mut().add_item(item); + } + } + } + managed_envelope.update(); // update envelope summary +} + +fn track_invalid(managed_envelope: &mut TypedEnvelope, reason: DiscardReason) { + managed_envelope.track_outcome(Outcome::Invalid(reason), DataCategory::Span, 1); + managed_envelope.track_outcome(Outcome::Invalid(reason), DataCategory::SpanIndexed, 1); +} + +fn parse_traces_data(item: Item) -> Result { + match item.content_type() { + Some(&ContentType::Json) => serde_json::from_slice(&item.payload()).map_err(|e| { + relay_log::debug!( + error = &e as &dyn std::error::Error, + "Failed to parse traces data as JSON" + ); + DiscardReason::InvalidJson + }), + Some(&ContentType::Protobuf) => TracesData::decode(item.payload()).map_err(|e| { + relay_log::debug!( + error = &e as &dyn std::error::Error, + "Failed to parse traces data as protobuf" + ); + DiscardReason::InvalidProtobuf + }), + _ => Err(DiscardReason::ContentType), + } +} + /// Creates a span from the transaction and applies tag extraction on it. /// /// Returns `None` when [`tag_extraction::extract_span_tags`] clears the span, which it shouldn't. diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index f6bc2efc4a6..4ce58d0dfb8 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -10,14 +10,11 @@ use relay_dynamic_config::{ }; use relay_event_normalization::span::ai::extract_ai_measurements; use relay_event_normalization::{ - normalize_measurements, normalize_performance_score, span::tag_extraction, validate_span, - CombinedMeasurementsConfig, MeasurementsConfig, PerformanceScoreConfig, RawUserAgentInfo, - TransactionsProcessor, -}; -use relay_event_normalization::{ - normalize_transaction_name, BorrowedSpanOpDefaults, ClientHints, FromUserAgentInfo, - GeoIpLookup, ModelCosts, SchemaProcessor, TimestampProcessor, TransactionNameRule, - TrimmingProcessor, + normalize_measurements, normalize_performance_score, normalize_transaction_name, + span::tag_extraction, validate_span, BorrowedSpanOpDefaults, ClientHints, + CombinedMeasurementsConfig, FromUserAgentInfo, GeoIpLookup, MeasurementsConfig, ModelCosts, + PerformanceScoreConfig, RawUserAgentInfo, SchemaProcessor, TimestampProcessor, + TransactionNameRule, TransactionsProcessor, TrimmingProcessor, }; use relay_event_schema::processor::{process_value, ProcessingAction, ProcessingState}; use relay_event_schema::protocol::{ diff --git a/relay-server/src/utils/rate_limits.rs b/relay-server/src/utils/rate_limits.rs index cbeb7f5fc28..3dd63fd50fc 100644 --- a/relay-server/src/utils/rate_limits.rs +++ b/relay-server/src/utils/rate_limits.rs @@ -130,6 +130,7 @@ fn infer_event_category(item: &Item) -> Option { ItemType::CheckIn => None, ItemType::Span => None, ItemType::OtelSpan => None, + ItemType::OtelTracesData => None, ItemType::ProfileChunk => Some(DataCategory::ProfileChunk), ItemType::Unknown(_) => None, } @@ -232,6 +233,7 @@ impl EnvelopeSummary { ItemType::ReplayRecording => &mut self.replay_quantity, ItemType::ReplayVideo => &mut self.replay_quantity, ItemType::CheckIn => &mut self.checkin_quantity, + ItemType::OtelTracesData => &mut self.span_quantity, ItemType::OtelSpan => &mut self.span_quantity, ItemType::Span => &mut self.span_quantity, ItemType::ProfileChunk => &mut self.profile_chunk_quantity, @@ -478,8 +480,9 @@ impl Enforcement { ItemType::ReplayVideo => !self.replays.is_active(), ItemType::ReplayRecording => !self.replays.is_active(), ItemType::CheckIn => !self.check_ins.is_active(), - ItemType::Span => !self.spans_indexed.is_active(), - ItemType::OtelSpan => !self.spans_indexed.is_active(), + ItemType::Span | ItemType::OtelSpan | ItemType::OtelTracesData => { + !self.spans_indexed.is_active() + } ItemType::Event | ItemType::Transaction | ItemType::Security diff --git a/relay-server/src/utils/sizes.rs b/relay-server/src/utils/sizes.rs index 4bb4b9d88dc..6c03420c2df 100644 --- a/relay-server/src/utils/sizes.rs +++ b/relay-server/src/utils/sizes.rs @@ -62,6 +62,7 @@ pub fn check_envelope_size_limits(config: &Config, envelope: &Envelope) -> Resul ItemType::Statsd => config.max_statsd_size(), ItemType::MetricBuckets => config.max_metric_buckets_size(), ItemType::Span | ItemType::OtelSpan => config.max_span_size(), + ItemType::OtelTracesData => config.max_event_size(), // a spans container similar to `Transaction` ItemType::ProfileChunk => config.max_profile_size(), ItemType::Unknown(_) => NO_LIMIT, }; diff --git a/relay-spans/Cargo.toml b/relay-spans/Cargo.toml index 807b2e9acfb..ad2928c1800 100644 --- a/relay-spans/Cargo.toml +++ b/relay-spans/Cargo.toml @@ -17,13 +17,13 @@ chrono = { workspace = true } hex = { workspace = true } once_cell = { workspace = true } opentelemetry-proto = { workspace = true, features = [ - "gen-tonic-messages", + "gen-tonic", "with-serde", "trace", ] } relay-event-schema = { workspace = true } relay-protocol = { workspace = true } +serde_json = { workspace = true } [dev-dependencies] insta = { workspace = true } -serde_json = { workspace = true } diff --git a/relay-spans/src/span.rs b/relay-spans/src/span.rs index 30b03f975fe..db0375226e1 100644 --- a/relay-spans/src/span.rs +++ b/relay-spans/src/span.rs @@ -3,7 +3,6 @@ use std::str::FromStr; use chrono::{TimeZone, Utc}; use opentelemetry_proto::tonic::common::v1::any_value::Value as OtelValue; use opentelemetry_proto::tonic::common::v1::{AnyValue, KeyValue}; -use relay_protocol::{Annotated, FromValue, Object}; use crate::otel_trace::{status::StatusCode as OtelStatusCode, Span as OtelSpan}; use crate::status_codes; @@ -11,6 +10,7 @@ use relay_event_schema::protocol::{ EventId, MetricSummary, MetricsSummary, Span as EventSpan, SpanData, SpanId, SpanStatus, Timestamp, TraceId, }; +use relay_protocol::{Annotated, FromValue, Object}; /// convert_from_otel_to_sentry_status returns a status as defined by Sentry based on the OTel status. fn convert_from_otel_to_sentry_status( diff --git a/requirements-dev.txt b/requirements-dev.txt index 437344747b6..a413483a119 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,6 +11,7 @@ flake8==7.0.0 confluent-kafka==2.1.1 flask==3.0.3 msgpack==1.0.7 +opentelemetry-proto==1.22.0 pytest-localserver==0.8.1 pytest-sentry==0.3.0 pytest-xdist==3.5.0 diff --git a/tests/integration/fixtures/__init__.py b/tests/integration/fixtures/__init__.py index 71685da3229..10e9f68ae15 100644 --- a/tests/integration/fixtures/__init__.py +++ b/tests/integration/fixtures/__init__.py @@ -203,6 +203,33 @@ def send_nel_event( return + def send_otel_span( + self, + project_id, + json=None, + bytes=None, + headers=None, + dsn_key_idx=0, + dsn_key=None, + ): + + if dsn_key is None: + dsn_key = self.get_dsn_public_key(project_id, dsn_key_idx) + + url = f"/api/{project_id}/otlp/v1/traces/?sentry_key={dsn_key}" + + if json: + headers = { + "Content-Type": "application/json", + **(headers or {}), + } + + response = self.post(url, headers=headers, json=json) + else: + response = self.post(url, headers=headers, data=bytes) + + response.raise_for_status() + def send_options(self, project_id, headers=None, dsn_key_idx=0): headers = { "X-Sentry-Auth": self.get_auth_header(project_id, dsn_key_idx), diff --git a/tests/integration/test_spans.py b/tests/integration/test_spans.py index 5a1b806c1de..039417c4891 100644 --- a/tests/integration/test_spans.py +++ b/tests/integration/test_spans.py @@ -5,9 +5,18 @@ from datetime import UTC, datetime, timedelta, timezone import pytest +from opentelemetry.proto.common.v1.common_pb2 import AnyValue, KeyValue +from opentelemetry.proto.trace.v1.trace_pb2 import ( + ResourceSpans, + ScopeSpans, + Span, + TracesData, +) from requests import HTTPError +from sentry_relay.consts import DataCategory from sentry_sdk.envelope import Envelope, Item, PayloadRef +from .asserts import time_after, time_within_delta from .consts import ( METRICS_EXTRACTION_MIN_SUPPORTED_VERSION, TRANSACTION_EXTRACT_MIN_SUPPORTED_VERSION, @@ -461,6 +470,558 @@ def make_otel_span(start, end): } +def test_span_ingestion( + mini_sentry, + relay_with_processing, + spans_consumer, + metrics_consumer, +): + spans_consumer = spans_consumer() + metrics_consumer = metrics_consumer() + + relay = relay_with_processing( + options={ + "aggregator": { + "bucket_interval": 1, + "initial_delay": 0, + "max_secs_in_past": 2**64 - 1, + } + } + ) + project_id = 42 + project_config = mini_sentry.add_full_project_config(project_id) + project_config["config"]["features"] = [ + "organizations:standalone-span-ingestion", + "projects:span-metrics-extraction", + "projects:relay-otel-endpoint", + ] + project_config["config"]["transactionMetrics"] = { + "version": TRANSACTION_EXTRACT_MIN_SUPPORTED_VERSION + } + + duration = timedelta(milliseconds=500) + now = datetime.now(timezone.utc) + end = now - timedelta(seconds=1) + start = end - duration + + # 1 - Send OTel span and sentry span via envelope + envelope = envelope_with_spans(start, end) + relay.send_envelope( + project_id, + envelope, + headers={ # Set browser header to verify that `d:transactions/measurements.score.total@ratio` is extracted only once. + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" + }, + ) + + # 2 - Send OTel json span via endpoint + relay.send_otel_span( + project_id, + json=make_otel_span(start, end), + ) + + protobuf_span = Span( + trace_id=bytes.fromhex("89143b0763095bd9c9955e8175d1fb24"), + span_id=bytes.fromhex("f0b809703e783d00"), + parent_span_id=bytes.fromhex("f0f0f0abcdef1234"), + name="my 3rd protobuf OTel span", + start_time_unix_nano=int(start.timestamp() * 1e9), + end_time_unix_nano=int(end.timestamp() * 1e9), + attributes=[ + KeyValue( + key="sentry.exclusive_time_nano", + value=AnyValue(int_value=int(duration.total_seconds() * 1e9)), + ), + ], + ) + scope_spans = ScopeSpans(spans=[protobuf_span]) + resource_spans = ResourceSpans(scope_spans=[scope_spans]) + traces_data = TracesData(resource_spans=[resource_spans]) + protobuf_payload = traces_data.SerializeToString() + + # 3 - Send OTel protobuf span via endpoint + relay.send_otel_span( + project_id, + bytes=protobuf_payload, + headers={"Content-Type": "application/x-protobuf"}, + ) + + spans = spans_consumer.get_spans(timeout=10.0, n=6) + + for span in spans: + span.pop("received", None) + + # endpoint might overtake envelope + spans.sort(key=lambda msg: msg["span_id"]) + + assert spans == [ + { + "data": { + "browser.name": "Chrome", + "client.address": "127.0.0.1", + "user_agent.original": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/111.0.0.0 Safari/537.36", + }, + "description": "my 1st OTel span", + "duration_ms": 500, + "exclusive_time_ms": 500.0, + "is_segment": True, + "organization_id": 1, + "project_id": 42, + "retention_days": 90, + "segment_id": "a342abb1214ca181", + "sentry_tags": { + "browser.name": "Chrome", + "category": "db", + "op": "db.query", + "status": "unknown", + }, + "span_id": "a342abb1214ca181", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "start_timestamp_precise": start.timestamp(), + "end_timestamp_precise": end.timestamp(), + "trace_id": "89143b0763095bd9c9955e8175d1fb23", + }, + { + "data": { + "browser.name": "Chrome", + "client.address": "127.0.0.1", + "user_agent.original": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/111.0.0.0 Safari/537.36", + }, + "description": "https://example.com/p/blah.js", + "duration_ms": 1500, + "exclusive_time_ms": 345.0, + "is_segment": True, + "measurements": {"score.total": {"value": 0.12121616}}, + "organization_id": 1, + "project_id": 42, + "retention_days": 90, + "segment_id": "b0429c44b67a3eb1", + "sentry_tags": { + "browser.name": "Chrome", + "category": "resource", + "description": "https://example.com/*/blah.js", + "domain": "example.com", + "file_extension": "js", + "group": "8a97a9e43588e2bd", + "op": "resource.script", + }, + "span_id": "b0429c44b67a3eb1", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "start_timestamp_precise": start.timestamp(), + "end_timestamp_precise": end.timestamp() + 1, + "trace_id": "ff62a8b040f340bda5d830223def1d81", + }, + { + "data": { + "browser.name": "Chrome", + "client.address": "127.0.0.1", + "user_agent.original": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/111.0.0.0 Safari/537.36", + }, + "description": r"test \" with \" escaped \" chars", + "duration_ms": 1500, + "exclusive_time_ms": 345.0, + "is_segment": False, + "organization_id": 1, + "project_id": 42, + "retention_days": 90, + "segment_id": "968cff94913ebb07", + "sentry_tags": {"browser.name": "Chrome", "op": "default"}, + "span_id": "cd429c44b67a3eb1", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "start_timestamp_precise": start.timestamp(), + "end_timestamp_precise": end.timestamp() + 1, + "trace_id": "ff62a8b040f340bda5d830223def1d81", + }, + { + "data": { + "browser.name": "Python Requests", + "client.address": "127.0.0.1", + "user_agent.original": "python-requests/2.32.2", + }, + "description": "my 2nd OTel span", + "duration_ms": 500, + "exclusive_time_ms": 500.0, + "is_segment": True, + "organization_id": 1, + "project_id": 42, + "retention_days": 90, + "segment_id": "d342abb1214ca182", + "sentry_tags": { + "browser.name": "Python Requests", + "op": "default", + "status": "unknown", + }, + "span_id": "d342abb1214ca182", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "start_timestamp_precise": start.timestamp(), + "end_timestamp_precise": end.timestamp(), + "trace_id": "89143b0763095bd9c9955e8175d1fb24", + }, + { + "data": { + "browser.name": "Chrome", + "client.address": "127.0.0.1", + "user_agent.original": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/111.0.0.0 Safari/537.36", + }, + "duration_ms": 1500, + "exclusive_time_ms": 345.0, + "is_segment": False, + "organization_id": 1, + "project_id": 42, + "retention_days": 90, + "segment_id": "968cff94913ebb07", + "sentry_tags": { + "browser.name": "Chrome", + "op": "default", + }, + "span_id": "ed429c44b67a3eb1", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "start_timestamp_precise": start.timestamp(), + "end_timestamp_precise": end.timestamp() + 1, + "trace_id": "ff62a8b040f340bda5d830223def1d81", + }, + { + "data": { + "browser.name": "Python Requests", + "client.address": "127.0.0.1", + "user_agent.original": "python-requests/2.32.2", + }, + "description": "my 3rd protobuf OTel span", + "duration_ms": 500, + "exclusive_time_ms": 500.0, + "is_segment": False, + "organization_id": 1, + "parent_span_id": "f0f0f0abcdef1234", + "project_id": 42, + "retention_days": 90, + "sentry_tags": { + "browser.name": "Python Requests", + "op": "default", + "status": "unknown", + }, + "span_id": "f0b809703e783d00", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "start_timestamp_precise": start.timestamp(), + "end_timestamp_precise": end.timestamp(), + "trace_id": "89143b0763095bd9c9955e8175d1fb24", + }, + ] + + spans_consumer.assert_empty() + + metrics = [metric for (metric, _headers) in metrics_consumer.get_metrics()] + metrics.sort(key=lambda m: (m["name"], sorted(m["tags"].items()), m["timestamp"])) + for metric in metrics: + try: + metric["value"].sort() + except AttributeError: + pass + + now_timestamp = int(now.timestamp()) + expected_timestamp = int(end.timestamp()) + expected_span_metrics = [ + { + "name": "c:spans/count_per_root_project@none", + "org_id": 1, + "project_id": 42, + "received_at": time_after(now_timestamp), + "retention_days": 90, + "tags": {"decision": "keep", "target_project_id": "42"}, + "timestamp": expected_timestamp, + "type": "c", + "value": 3.0, + }, + { + "name": "c:spans/count_per_root_project@none", + "org_id": 1, + "project_id": 42, + "received_at": time_after(now_timestamp), + "retention_days": 90, + "tags": {"decision": "keep", "target_project_id": "42"}, + "timestamp": expected_timestamp + 1, + "type": "c", + "value": 3.0, + }, + { + "name": "c:spans/usage@none", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {}, + "timestamp": expected_timestamp, + "type": "c", + "value": 3.0, + "received_at": time_after(now_timestamp), + }, + { + "name": "c:spans/usage@none", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {}, + "timestamp": expected_timestamp + 1, + "type": "c", + "value": 3.0, + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/duration@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": { + "file_extension": "js", + "span.category": "resource", + "span.description": "https://example.com/*/blah.js", + "span.domain": "example.com", + "span.group": "8a97a9e43588e2bd", + "span.op": "resource.script", + }, + "timestamp": expected_timestamp + 1, + "type": "d", + "value": [1500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/duration@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": { + "span.category": "db", + "span.op": "db.query", + }, + "timestamp": expected_timestamp, + "type": "d", + "value": [500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/duration@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": { + "span.op": "default", + }, + "timestamp": expected_timestamp, + "type": "d", + "value": [500.0, 500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/duration@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {"span.op": "default"}, + "timestamp": expected_timestamp + 1, + "type": "d", + "value": [1500.0, 1500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/duration_light@millisecond", + "org_id": 1, + "project_id": 42, + "received_at": time_after(now_timestamp), + "retention_days": 90, + "tags": { + "file_extension": "js", + "span.category": "resource", + "span.description": "https://example.com/*/blah.js", + "span.domain": "example.com", + "span.group": "8a97a9e43588e2bd", + "span.op": "resource.script", + }, + "timestamp": expected_timestamp + 1, + "type": "d", + "value": [1500.0], + }, + { + "name": "d:spans/duration_light@millisecond", + "org_id": 1, + "project_id": 42, + "received_at": time_after(now_timestamp), + "retention_days": 90, + "tags": {"span.category": "db", "span.op": "db.query"}, + "timestamp": expected_timestamp, + "type": "d", + "value": [500.0], + }, + { + "org_id": 1, + "project_id": 42, + "name": "d:spans/exclusive_time@millisecond", + "type": "d", + "value": [345.0], + "timestamp": expected_timestamp + 1, + "tags": { + "file_extension": "js", + "span.category": "resource", + "span.description": "https://example.com/*/blah.js", + "span.domain": "example.com", + "span.group": "8a97a9e43588e2bd", + "span.op": "resource.script", + }, + "retention_days": 90, + "received_at": time_after(now_timestamp), + }, + { + "org_id": 1, + "project_id": 42, + "name": "d:spans/exclusive_time@millisecond", + "retention_days": 90, + "tags": {"span.category": "db", "span.op": "db.query"}, + "timestamp": expected_timestamp, + "type": "d", + "value": [500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/exclusive_time@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {"span.op": "default"}, + "timestamp": expected_timestamp, + "type": "d", + "value": [500.0, 500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/exclusive_time@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {"span.op": "default"}, + "timestamp": expected_timestamp + 1, + "type": "d", + "value": [345.0, 345.0], + "received_at": time_after(now_timestamp), + }, + { + "org_id": 1, + "project_id": 42, + "name": "d:spans/exclusive_time_light@millisecond", + "type": "d", + "value": [345.0], + "timestamp": expected_timestamp + 1, + "tags": { + "file_extension": "js", + "span.category": "resource", + "span.description": "https://example.com/*/blah.js", + "span.domain": "example.com", + "span.group": "8a97a9e43588e2bd", + "span.op": "resource.script", + }, + "retention_days": 90, + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/exclusive_time_light@millisecond", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {"span.category": "db", "span.op": "db.query"}, + "timestamp": expected_timestamp, + "type": "d", + "value": [500.0], + "received_at": time_after(now_timestamp), + }, + { + "name": "d:spans/webvital.score.total@ratio", + "org_id": 1, + "project_id": 42, + "retention_days": 90, + "tags": {"span.op": "resource.script"}, + "timestamp": expected_timestamp + 1, + "type": "d", + "value": [0.12121616], + "received_at": time_after(now_timestamp), + }, + ] + assert [m for m in metrics if ":spans/" in m["name"]] == expected_span_metrics + + # Regardless of whether transactions are extracted, score.total is only converted to a transaction metric once: + score_total_metrics = [ + m + for m in metrics + if m["name"] == "d:transactions/measurements.score.total@ratio" + ] + assert len(score_total_metrics) == 1, score_total_metrics + assert len(score_total_metrics[0]["value"]) == 1 + + metrics_consumer.assert_empty() + + +def test_otel_endpoint_disabled(mini_sentry, relay): + relay = relay( + mini_sentry, + { + "outcomes": { + "emit_outcomes": True, + "batch_size": 1, + "batch_interval": 1, + "source": "relay", + } + }, + ) + project_id = 42 + project_config = mini_sentry.add_full_project_config(project_id)["config"] + project_config["features"] = ["organizations:standalone-span-ingestion"] + + end = datetime.now(timezone.utc) - timedelta(seconds=1) + start = end - timedelta(milliseconds=500) + relay.send_otel_span( + project_id, + json=make_otel_span(start, end), + ) + + outcomes = [] + for _ in range(2): + outcomes.extend(mini_sentry.captured_outcomes.get(timeout=3).get("outcomes")) + outcomes.sort(key=lambda x: x["category"]) + + assert outcomes == [ + { + "org_id": 1, + "key_id": 123, + "project_id": 42, + "outcome": 3, + "reason": "feature_disabled", + "category": category.value, + "quantity": 1, + "source": "relay", + "timestamp": time_within_delta(), + } + for category in [DataCategory.SPAN, DataCategory.SPAN_INDEXED] + ] + + # Second attempt will cause a 403 response: + with pytest.raises(HTTPError) as exc_info: + relay.send_otel_span( + project_id, + json=make_otel_span(start, end), + ) + response = exc_info.value.response + assert response.status_code == 403 + assert response.json() == { + "detail": "event submission rejected with_reason: FeatureDisabled(OtelEndpoint)" + } + + # No envelopes were received: + assert mini_sentry.captured_events.empty() + + def test_span_extraction_with_metrics_summary( mini_sentry, relay_with_processing,