Skip to content

Commit

Permalink
Merge pull request #2475 from calebschoepp/otel-metrics-via-tracing
Browse files Browse the repository at this point in the history
Taking a first crack at implementing metrics
  • Loading branch information
calebschoepp committed Apr 29, 2024
2 parents 7d66e91 + 1b0f40d commit 72cc025
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 67 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/telemetry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ tracing-appender = "0.2.2"
tracing-opentelemetry = "0.23.0"
tracing-subscriber = { version = "0.3.17", features = ["env-filter", "json", "registry"] }
url = "2.2.2"
terminal = { path = "../terminal" }
78 changes: 65 additions & 13 deletions crates/telemetry/src/env.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,44 @@
/// Returns a boolean indicating if the OTEL layer should be enabled.
use std::env::VarError;

use opentelemetry_otlp::{
OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_METRICS_ENDPOINT, OTEL_EXPORTER_OTLP_PROTOCOL,
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
};

const OTEL_SDK_DISABLED: &str = "OTEL_SDK_DISABLED";
const OTEL_EXPORTER_OTLP_TRACES_PROTOCOL: &str = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL";
const OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: &str = "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL";

/// Returns a boolean indicating if the OTEL tracing layer should be enabled.
///
/// It is considered enabled if any of the following environment variables are set and not empty:
/// - `OTEL_EXPORTER_OTLP_ENDPOINT`
/// - `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`
///
/// Note that this is overridden if OTEL_SDK_DISABLED is set and not empty.
pub(crate) fn otel_tracing_enabled() -> bool {
any_vars_set(&[
OTEL_EXPORTER_OTLP_ENDPOINT,
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT,
]) && !otel_sdk_disabled()
}

/// Returns a boolean indicating if the OTEL metrics layer should be enabled.
///
/// It is considered enabled if any of the following environment variables are set and not empty:
/// - `OTEL_EXPORTER_OTLP_ENDPOINT`
/// - `OTEL_EXPORTER_OTLP_METRICS_ENDPOINT`
///
/// Note that this is overridden if OTEL_SDK_DISABLED is set and not empty.
pub(crate) fn otel_enabled() -> bool {
const ENABLING_VARS: &[&str] = &[
"OTEL_EXPORTER_OTLP_ENDPOINT",
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT",
"OTEL_EXPORTER_OTLP_METRICS_ENDPOINT",
];
ENABLING_VARS
pub(crate) fn otel_metrics_enabled() -> bool {
any_vars_set(&[
OTEL_EXPORTER_OTLP_ENDPOINT,
OTEL_EXPORTER_OTLP_METRICS_ENDPOINT,
]) && !otel_sdk_disabled()
}

fn any_vars_set(enabling_vars: &[&str]) -> bool {
enabling_vars
.iter()
.any(|key| std::env::var_os(key).is_some_and(|val| !val.is_empty()))
}
Expand All @@ -21,7 +47,7 @@ pub(crate) fn otel_enabled() -> bool {
///
/// It is considered disabled if the environment variable `OTEL_SDK_DISABLED` is set and not empty.
pub(crate) fn otel_sdk_disabled() -> bool {
std::env::var_os("OTEL_SDK_DISABLED").is_some_and(|val| !val.is_empty())
std::env::var_os(OTEL_SDK_DISABLED).is_some_and(|val| !val.is_empty())
}

/// The protocol to use for OTLP exporter.
Expand All @@ -34,15 +60,41 @@ pub(crate) enum OtlpProtocol {
impl OtlpProtocol {
/// Returns the protocol to be used for exporting traces as defined by the environment.
pub(crate) fn traces_protocol_from_env() -> Self {
let trace_protocol = std::env::var("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL");
let general_protocol = std::env::var("OTEL_EXPORTER_OTLP_PROTOCOL");
let protocol = trace_protocol.unwrap_or(general_protocol.unwrap_or_default());
Self::protocol_from_env(
std::env::var(OTEL_EXPORTER_OTLP_TRACES_PROTOCOL),
std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL),
)
}

/// Returns the protocol to be used for exporting metrics as defined by the environment.
pub(crate) fn metrics_protocol_from_env() -> Self {
Self::protocol_from_env(
std::env::var(OTEL_EXPORTER_OTLP_METRICS_PROTOCOL),
std::env::var(OTEL_EXPORTER_OTLP_PROTOCOL),
)
}

fn protocol_from_env(
specific_protocol: Result<String, VarError>,
general_protocol: Result<String, VarError>,
) -> Self {
let protocol =
specific_protocol.unwrap_or(general_protocol.unwrap_or("http/protobuf".to_string()));

static WARN_ONCE: std::sync::Once = std::sync::Once::new();

match protocol.as_str() {
"grpc" => Self::Grpc,
"http/protobuf" => Self::HttpProtobuf,
"http/json" => Self::HttpJson,
_ => Self::HttpProtobuf,
s => {
WARN_ONCE.call_once(|| {
terminal::warn!(
"'{s}' is not a valid OTLP protocol, defaulting to http/protobuf"
);
});
Self::HttpProtobuf
}
}
}
}
56 changes: 45 additions & 11 deletions crates/telemetry/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::io::IsTerminal;

use env::otel_enabled;
use env::otel_sdk_disabled;
use env::otel_metrics_enabled;
use env::otel_tracing_enabled;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use tracing_subscriber::{fmt, prelude::*, registry, EnvFilter, Layer};

pub mod detector;
mod env;
pub mod metrics;
mod propagation;
mod traces;

Expand All @@ -16,9 +17,34 @@ pub use propagation::inject_trace_context;
/// Initializes telemetry for Spin using the [tracing] library.
///
/// Under the hood this involves initializing a [tracing::Subscriber] with multiple [Layer]s. One
/// [Layer] emits [tracing] events to stderr, and another sends spans to an OTEL collector.
/// [Layer] emits [tracing] events to stderr, another sends spans to an OTel collector, and another
/// sends metrics to an OTel collector.
///
/// Configuration is pulled from the environment.
/// Configuration for the OTel layers is pulled from the environment.
///
/// Examples of emitting traces from Spin:
///
/// ```no_run
/// # use tracing::instrument;
/// # use tracing::Level;
/// #[instrument(name = "span_name", err(level = Level::INFO), fields(otel.name = "dynamically set name"))]
/// fn func_you_want_to_trace() -> anyhow::Result<String> {
/// Ok("Hello, world!".to_string())
/// }
/// ```
///
/// Some notes on tracing:
///
/// - If you don't want the span to be collected by default emit it at a trace or debug level.
/// - Make sure you `.in_current_span()` any spawned tasks so the span context is propagated.
/// - Use the otel.name attribute to dynamically set the span name.
/// - Use the err argument to have instrument automatically handle errors.
///
/// Examples of emitting metrics from Spin:
///
/// ```no_run
/// spin_telemetry::metrics::monotonic_counter!(spin.metric_name = 1, metric_attribute = "value");
/// ```
pub fn init(spin_version: String) -> anyhow::Result<ShutdownGuard> {
// This layer will print all tracing library log messages to stderr.
let fmt_layer = fmt::layer()
Expand All @@ -30,19 +56,27 @@ pub fn init(spin_version: String) -> anyhow::Result<ShutdownGuard> {
.add_directive("watchexec=off".parse()?),
);

// We only want to build the otel layer if the user passed some endpoint configuration and it wasn't explicitly disabled.
let build_otel_layer = !otel_sdk_disabled() && otel_enabled();
let otel_layer = if build_otel_layer {
// In this case we want to set the error handler to log errors to the tracing layer.
opentelemetry::global::set_error_handler(otel_error_handler)?;
// Even if metrics or tracing aren't enabled we're okay to turn on the global error handler
opentelemetry::global::set_error_handler(otel_error_handler)?;

let otel_tracing_layer = if otel_tracing_enabled() {
Some(traces::otel_tracing_layer(spin_version.clone())?)
} else {
None
};

Some(traces::otel_tracing_layer(spin_version)?)
let otel_metrics_layer = if otel_metrics_enabled() {
Some(metrics::otel_metrics_layer(spin_version)?)
} else {
None
};

// Build a registry subscriber with the layers we want to use.
registry().with(otel_layer).with(fmt_layer).init();
registry()
.with(otel_tracing_layer)
.with(otel_metrics_layer)
.with(fmt_layer)
.init();

// Used to propagate trace information in the standard W3C TraceContext format. Even if the otel
// layer is disabled we still want to propagate trace context.
Expand Down
121 changes: 121 additions & 0 deletions crates/telemetry/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use std::time::Duration;

use anyhow::{bail, Result};
use opentelemetry_otlp::MetricsExporterBuilder;
use opentelemetry_sdk::{
metrics::{
reader::{DefaultAggregationSelector, DefaultTemporalitySelector},
PeriodicReader, SdkMeterProvider,
},
resource::{EnvResourceDetector, TelemetryResourceDetector},
runtime, Resource,
};
use tracing_opentelemetry::{MetricsLayer, OpenTelemetryLayer};
use tracing_subscriber::{filter::Filtered, layer::Layered, EnvFilter, Registry};

use crate::{detector::SpinResourceDetector, env::OtlpProtocol};

/// Constructs a layer for the tracing subscriber that sends metrics to an OTEL collector.
///
/// It pulls OTEL configuration from the environment based on the variables defined
/// [here](https://opentelemetry.io/docs/specs/otel/protocol/exporter/) and
/// [here](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration).
pub(crate) fn otel_metrics_layer(spin_version: String) -> Result<CustomMetricsLayer> {
let resource = Resource::from_detectors(
Duration::from_secs(5),
vec![
// Set service.name from env OTEL_SERVICE_NAME > env OTEL_RESOURCE_ATTRIBUTES > spin
// Set service.version from Spin metadata
Box::new(SpinResourceDetector::new(spin_version)),
// Sets fields from env OTEL_RESOURCE_ATTRIBUTES
Box::new(EnvResourceDetector::new()),
// Sets telemetry.sdk{name, language, version}
Box::new(TelemetryResourceDetector),
],
);

// This will configure the exporter based on the OTEL_EXPORTER_* environment variables. We
// currently default to using the HTTP exporter but in the future we could select off of the
// combination of OTEL_EXPORTER_OTLP_PROTOCOL and OTEL_EXPORTER_OTLP_TRACES_PROTOCOL to
// determine whether we should use http/protobuf or grpc.
let exporter_builder: MetricsExporterBuilder = match OtlpProtocol::metrics_protocol_from_env() {
OtlpProtocol::Grpc => opentelemetry_otlp::new_exporter().tonic().into(),
OtlpProtocol::HttpProtobuf => opentelemetry_otlp::new_exporter().http().into(),
OtlpProtocol::HttpJson => bail!("http/json OTLP protocol is not supported"),
};
let exporter = exporter_builder.build_metrics_exporter(
Box::new(DefaultTemporalitySelector::new()),
Box::new(DefaultAggregationSelector::new()),
)?;

let reader = PeriodicReader::builder(exporter, runtime::Tokio).build();
let meter_provider = SdkMeterProvider::builder()
.with_reader(reader)
.with_resource(resource)
.build();

Ok(MetricsLayer::new(meter_provider))
}

#[macro_export]
/// Records an increment to the named counter with the given attributes.
///
/// The increment may only be an i64 or f64. You must not mix types for the same metric.
///
/// ```no_run
/// # use spin_telemetry::metrics::counter;
/// counter!(spin.metric_name = 1, metric_attribute = "value");
/// ```
macro_rules! counter {
($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => {
tracing::trace!(counter.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*);
}
}

#[macro_export]
/// Adds an additional value to the distribution of the named histogram with the given attributes.
///
/// The increment may only be an i64 or f64. You must not mix types for the same metric.
///
/// ```no_run
/// # use spin_telemetry::metrics::histogram;
/// histogram!(spin.metric_name = 1.5, metric_attribute = "value");
/// ```
macro_rules! histogram {
($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => {
tracing::trace!(histogram.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*);
}
}

#[macro_export]
/// Records an increment to the named monotonic counter with the given attributes.
///
/// The increment may only be a positive i64 or f64. You must not mix types for the same metric.
///
/// ```no_run
/// # use spin_telemetry::metrics::monotonic_counter;
/// monotonic_counter!(spin.metric_name = 1, metric_attribute = "value");
/// ```
macro_rules! monotonic_counter {
($metric:ident $(. $suffixes:ident)* = $metric_value:expr $(, $attrs:ident=$values:expr)*) => {
tracing::trace!(monotonic_counter.$metric $(. $suffixes)* = $metric_value $(, $attrs=$values)*);
}
}

pub use counter;
pub use histogram;
pub use monotonic_counter;

/// This really large type alias is require to make the registry.with() pattern happy.
type CustomMetricsLayer = MetricsLayer<
Layered<
Option<
Filtered<
OpenTelemetryLayer<Registry, opentelemetry_sdk::trace::Tracer>,
EnvFilter,
Registry,
>,
>,
Registry,
>,
>;
Loading

0 comments on commit 72cc025

Please sign in to comment.