Skip to content

Commit

Permalink
prometheus: make sure http_requests_error_total and `http_requests_…
Browse files Browse the repository at this point in the history
…total` are incremented. (#1953)

fixes #1954


### prometheus: make sure http_requests_error_total and
http_requests_total are incremented. ([PR
#1953](#1953))

`http_requests_error_total` did only increment for requests that would
be an `INTERNAL_SERVER_ERROR` in the router (the service stack returning
a BoxError).
What this means is that validation errors would not increment this
counter.

`http_requests_total` would only increment for successful requests,
while the prometheus documentation mentions this key should be
incremented regardless of whether the request succeeded or not.

This PR makes sure we always increment `http_requests_total`, and we
increment `http_requests_error_total` when the StatusCode is not 2XX.
  • Loading branch information
o0Ignition0o committed Oct 20, 2022
1 parent 72da455 commit b33fe0a
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 13 deletions.
11 changes: 11 additions & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ Queries that passed 64 bit integers for Float values would (incorrectly) fail to

By [@o0Ignition0o](https://github.com/o0Ignition0o) in https://github.com/apollographql/router/pull/1951

### prometheus: make sure http_requests_error_total and http_requests_total are incremented. ([PR #1953](https://github.com/apollographql/router/pull/1953))

`http_requests_error_total` did only increment for requests that would be an `INTERNAL_SERVER_ERROR` in the router (the service stack returning a BoxError).
What this means is that validation errors would not increment this counter.

`http_requests_total` would only increment for successful requests, while the prometheus documentation mentions this key should be incremented regardless of whether the request succeeded or not.

This PR makes sure we always increment `http_requests_total`, and we increment `http_requests_error_total` when the StatusCode is not 2XX.

By [@o0Ignition0o](https://github.com/o0Ignition0o) in https://github.com/apollographql/router/pull/1953

### Set no_delay and keepalive on subgraph requests [Issue #1905](https://github.com/apollographql/router/issues/1905))

It was incorrectly removed in a previous pull request.
Expand Down
58 changes: 45 additions & 13 deletions apollo-router/src/plugins/telemetry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -765,11 +765,12 @@ impl Telemetry {
);

metric_attrs.extend(attributes.into_iter().map(|(k, v)| KeyValue::new(k, v)));
metrics.http_requests_total.add(1, &metric_attrs);
} else {
metrics.http_requests_total.add(1, &metric_attrs);
}

if !parts.status.is_success() {
metric_attrs.push(KeyValue::new("error", parts.status.to_string()));
metrics.http_requests_error_total.add(1, &metric_attrs);
}
let response = http::Response::from_parts(
parts,
once(ready(first_response.unwrap_or_default()))
Expand All @@ -779,12 +780,12 @@ impl Telemetry {

Ok(SupergraphResponse { context, response })
}
Err(err) => {
metrics.http_requests_error_total.add(1, &metric_attrs);

Err(err)
}
Err(err) => Err(err),
};

// http_requests_total - the total number of HTTP requests received
metrics.http_requests_total.add(1, &metric_attrs);

metrics
.http_requests_duration
.record(request_duration.as_secs_f64(), &metric_attrs);
Expand Down Expand Up @@ -907,14 +908,14 @@ impl Telemetry {
let errors = merge_config!(errors);

AttributesForwardConf {
insert: (!insert.is_empty()).then(|| insert),
insert: (!insert.is_empty()).then_some(insert),
request: (request.header.is_some() || request.body.is_some())
.then(|| request),
.then_some(request),
response: (response.header.is_some() || response.body.is_some())
.then(|| response),
.then_some(response),
errors: (errors.extensions.is_some() || errors.include_messages)
.then(|| errors),
context: (!context.is_empty()).then(|| context),
.then_some(errors),
context: (!context.is_empty()).then_some(context),
}
}),
)
Expand Down Expand Up @@ -1439,6 +1440,19 @@ mod tests {
.unwrap())
});

let mut mock_bad_request_service = MockSupergraphService::new();
mock_bad_request_service
.expect_call()
.times(1)
.returning(move |req: SupergraphRequest| {
Ok(SupergraphResponse::fake_builder()
.context(req.context)
.status_code(StatusCode::BAD_REQUEST)
.data(json!({"errors": [{"message": "nope"}]}))
.build()
.unwrap())
});

let mut mock_subgraph_service = MockSubgraphService::new();
mock_subgraph_service
.expect_call()
Expand Down Expand Up @@ -1589,6 +1603,21 @@ mod tests {
.await
.unwrap();

let mut bad_request_supergraph_service =
dyn_plugin.supergraph_service(BoxService::new(mock_bad_request_service));
let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set");

let _router_response = bad_request_supergraph_service
.ready()
.await
.unwrap()
.call(router_req.build().unwrap())
.await
.unwrap()
.next_response()
.await
.unwrap();

let mut subgraph_service =
dyn_plugin.subgraph_service("my_subgraph_name", BoxService::new(mock_subgraph_service));
let subgraph_req = SubgraphRequest::fake_builder()
Expand Down Expand Up @@ -1683,5 +1712,8 @@ mod tests {
assert!(prom_metrics.contains(r#"http_request_duration_seconds_count{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header"}"#));
assert!(prom_metrics.contains(r#"http_request_duration_seconds_sum{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header"}"#));
assert!(prom_metrics.contains(r#"http_request_duration_seconds_bucket{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value",le="1"}"#));
assert!(prom_metrics.contains(r#"http_requests_total{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value"} 1"#));
assert!(prom_metrics.contains(r#"http_requests_total{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400"} 1"#));
assert!(prom_metrics.contains(r#"http_requests_error_total{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400"} 1"#))
}
}

0 comments on commit b33fe0a

Please sign in to comment.