-
-
Notifications
You must be signed in to change notification settings - Fork 5
feat(taskbroker): Add Useful Push Taskbroker Metrics #595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fbd2453
2ae4a59
115b8f0
a16f3bc
92209d7
2320bd4
768fa11
15cddde
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,6 +60,12 @@ def PushTask( | |
| context: grpc.ServicerContext, | ||
| ) -> PushTaskResponse: | ||
| """Handle incoming task activation.""" | ||
| start_time = time.monotonic() | ||
| self.worker._metrics.incr( | ||
| "taskworker.worker.push_rpc", | ||
| tags={"result": "attempt", "processing_pool": self.worker._processing_pool_name}, | ||
| ) | ||
|
|
||
| # Create `InflightTaskActivation` from the pushed task | ||
| inflight = InflightTaskActivation( | ||
| activation=request.task, | ||
|
|
@@ -69,8 +75,30 @@ def PushTask( | |
|
|
||
| # Push the task to the worker queue (wait at most 5 seconds) | ||
| if not self.worker.push_task(inflight, timeout=5): | ||
| self.worker._metrics.incr( | ||
| "taskworker.worker.push_rpc", | ||
| tags={"result": "busy", "processing_pool": self.worker._processing_pool_name}, | ||
| ) | ||
|
|
||
| self.worker._metrics.distribution( | ||
| "taskworker.worker.push_rpc.duration", | ||
| time.monotonic() - start_time, | ||
| tags={"result": "busy", "processing_pool": self.worker._processing_pool_name}, | ||
| ) | ||
|
|
||
| context.abort(grpc.StatusCode.RESOURCE_EXHAUSTED, "worker busy") | ||
|
|
||
| self.worker._metrics.incr( | ||
| "taskworker.worker.push_rpc", | ||
| tags={"result": "accepted", "processing_pool": self.worker._processing_pool_name}, | ||
| ) | ||
|
|
||
| self.worker._metrics.distribution( | ||
| "taskworker.worker.push_rpc.duration", | ||
| time.monotonic() - start_time, | ||
| tags={"result": "accepted", "processing_pool": self.worker._processing_pool_name}, | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Metrics after abort lack structural mutual exclusivityLow Severity The Additional Locations (1)Reviewed by Cursor Bugbot for commit fbd2453. Configure here.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Metrics being wrong when mocks are used is fine imo. The mocks should raise |
||
|
|
||
|
george-sentry marked this conversation as resolved.
|
||
| return PushTaskResponse() | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,13 +48,13 @@ pub fn bucket_range_for_fetch_thread(thread_index: usize, fetch_threads: usize) | |
| /// Thin interface for the push pool. It mostly serves to enable proper unit testing, but it also decouples fetch logic from push logic even further. | ||
| #[async_trait] | ||
| pub trait TaskPusher { | ||
| /// Push a single task to the worker service. | ||
| async fn push_task(&self, activation: InflightActivation) -> Result<(), PushError>; | ||
| /// Submit a single task to the push pool. | ||
| async fn submit_task(&self, activation: InflightActivation) -> Result<(), PushError>; | ||
| } | ||
|
|
||
| #[async_trait] | ||
| impl TaskPusher for PushPool { | ||
| async fn push_task(&self, activation: InflightActivation) -> Result<(), PushError> { | ||
| async fn submit_task(&self, activation: InflightActivation) -> Result<(), PushError> { | ||
| self.submit(activation).await | ||
| } | ||
| } | ||
|
|
@@ -123,41 +123,60 @@ impl<T: TaskPusher + Send + Sync + 'static> FetchPool<T> { | |
| .await | ||
| { | ||
| Ok(activations) if activations.is_empty() => { | ||
| metrics::counter!("fetch.empty").increment(1); | ||
| debug!("No pending activations"); | ||
|
|
||
| // Wait for pending activations to appear | ||
| backoff = true; | ||
| } | ||
|
|
||
| Ok(activations) => { | ||
| metrics::counter!("fetch.claimed") | ||
| .increment(activations.len() as u64); | ||
| metrics::histogram!("fetch.claim_batch_size") | ||
| .record(activations.len() as f64); | ||
|
|
||
| debug!("Fetched {} activations", activations.len()); | ||
|
|
||
| for activation in activations { | ||
| let id = activation.id.clone(); | ||
|
|
||
| if let Err(e) = pusher.push_task(activation).await { | ||
| match e { | ||
| PushError::Timeout => warn!( | ||
| match pusher.submit_task(activation).await { | ||
| Ok(()) => metrics::counter!("fetch.submit", "result" => "ok").increment(1), | ||
|
|
||
| Err(PushError::Timeout) => { | ||
| metrics::counter!("fetch.submit", "result" => "timeout") | ||
| .increment(1); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, fixed. I think |
||
|
|
||
| warn!( | ||
| task_id = %id, | ||
| "Submit to push pool timed out after {} milliseconds", | ||
| config.push_queue_timeout_ms | ||
| ), | ||
| ); | ||
|
|
||
| // Wait for push queue to empty | ||
| backoff = true; | ||
| } | ||
|
|
||
| PushError::Channel(e) => warn!( | ||
| Err(PushError::Channel(e)) => { | ||
| metrics::counter!("fetch.submit", "result" => "channel_error") | ||
| .increment(1); | ||
|
|
||
| warn!( | ||
| task_id = %id, | ||
| error = ?e, | ||
| "Submit to push pool failed due to channel error", | ||
| ) | ||
| } | ||
| ); | ||
|
cursor[bot] marked this conversation as resolved.
|
||
|
|
||
| backoff = true; | ||
| // Wait before trying again | ||
| backoff = true; | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
| } | ||
|
sentry[bot] marked this conversation as resolved.
|
||
|
|
||
| Err(e) => { | ||
| metrics::counter!("fetch.store_error").increment(1); | ||
| warn!( | ||
| error = ?e, | ||
| "Store failed while fetching tasks" | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.