-
Notifications
You must be signed in to change notification settings - Fork 210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Debug CI failures, introduce task groups, error handling improvements #834
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,164 @@ | ||
use std::future::Future; | ||
use std::sync::atomic::AtomicBool; | ||
use std::sync::atomic::Ordering::SeqCst; | ||
use std::sync::Arc; | ||
use std::time::{Duration, Instant}; | ||
|
||
use anyhow::anyhow; | ||
use futures::lock::Mutex; | ||
pub use imp::*; | ||
use thiserror::Error; | ||
#[cfg(not(target_family = "wasm"))] | ||
use tokio::task::JoinHandle; | ||
use tracing::info; | ||
#[cfg(target_family = "wasm")] | ||
type JoinHandle<T> = futures::future::Ready<anyhow::Result<T>>; | ||
|
||
#[derive(Debug, Error)] | ||
#[error("deadline has elapsed")] | ||
pub struct Elapsed; | ||
|
||
#[derive(Debug, Default)] | ||
struct TaskGroupInner { | ||
/// Was the shutdown requested, either externally or due to any task failure? | ||
is_shutting_down: AtomicBool, | ||
join: Mutex<Vec<(String, JoinHandle<()>)>>, | ||
} | ||
|
||
/// A group of task working together | ||
/// | ||
/// Using this struct it is possible to spawn one or more | ||
/// main thread collabarating, which can cooperatively gracefully | ||
/// shut down, either due to external request, or failure of | ||
/// one of them. | ||
/// | ||
/// Each thread should periodically check [`TaskHandle`] or rely | ||
/// on condition like channel disconnection to detect when it is time | ||
/// to finish. | ||
#[derive(Clone, Default, Debug)] | ||
pub struct TaskGroup { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have to polish this a bit before landing, but that's the idea - instead of spawning thread willy-nilly, have a primitive that keeps track of them, and allows to signal shutdown and wait for all of them to wrap up before exiting. Passing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Really cool abstraction. I wonder if we still need a way to get output from tasks in some places? Maybe not for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It could be done, where |
||
inner: Arc<TaskGroupInner>, | ||
} | ||
|
||
impl TaskGroup { | ||
pub fn new() -> Self { | ||
Self::default() | ||
} | ||
|
||
fn make_handle(&self) -> TaskHandle { | ||
TaskHandle { | ||
inner: self.inner.clone(), | ||
} | ||
} | ||
|
||
#[cfg(not(target_family = "wasm"))] | ||
pub async fn spawn<Fut>( | ||
&mut self, | ||
name: impl Into<String>, | ||
f: impl FnOnce(TaskHandle) -> Fut + Send + 'static, | ||
) where | ||
Fut: Future<Output = ()> + Send + 'static, | ||
{ | ||
let name = name.into(); | ||
let mut guard = TaskPanicGuard { | ||
name: name.clone(), | ||
inner: self.inner.clone(), | ||
completed: false, | ||
}; | ||
let handle = self.make_handle(); | ||
|
||
if let Some(handle) = self::imp::spawn(async move { | ||
f(handle).await; | ||
}) { | ||
self.inner.join.lock().await.push((name, handle)); | ||
} | ||
guard.completed = true; | ||
} | ||
|
||
// TODO: Send vs lack of Send bound; do something about it | ||
#[cfg(target_family = "wasm")] | ||
pub async fn spawn<Fut>( | ||
&mut self, | ||
name: impl Into<String>, | ||
f: impl FnOnce(TaskHandle) -> Fut + 'static, | ||
) where | ||
Fut: Future<Output = ()> + 'static, | ||
{ | ||
let name = name.into(); | ||
let mut guard = TaskPanicGuard { | ||
name: name.clone(), | ||
inner: self.inner.clone(), | ||
completed: false, | ||
}; | ||
let handle = self.make_handle(); | ||
|
||
if let Some(handle) = self::imp::spawn(async move { | ||
f(handle).await; | ||
}) { | ||
self.inner.join.lock().await.push((name, handle)); | ||
} | ||
guard.completed = true; | ||
} | ||
|
||
pub fn shutdown(&self) { | ||
self.inner.is_shutting_down.store(true, SeqCst); | ||
} | ||
|
||
pub async fn join_all(self) -> anyhow::Result<()> { | ||
for (name, join) in self.inner.join.lock().await.drain(..) { | ||
join.await | ||
.map_err(|e| anyhow!("Thread {name} panicked with: {e}"))? | ||
} | ||
Ok(()) | ||
} | ||
} | ||
|
||
pub struct TaskPanicGuard { | ||
name: String, | ||
inner: Arc<TaskGroupInner>, | ||
/// Did the future completed successfully (no panic) | ||
completed: bool, | ||
} | ||
|
||
impl TaskPanicGuard { | ||
pub fn is_shutting_down(&self) -> bool { | ||
self.inner.is_shutting_down.load(SeqCst) | ||
} | ||
} | ||
|
||
impl Drop for TaskPanicGuard { | ||
fn drop(&mut self) { | ||
if !self.completed { | ||
info!( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be a warning? |
||
"Task {} shut down uncleanly. Shutting down task group.", | ||
self.name | ||
); | ||
self.inner.is_shutting_down.store(true, SeqCst); | ||
} | ||
} | ||
} | ||
|
||
pub struct TaskHandle { | ||
inner: Arc<TaskGroupInner>, | ||
} | ||
|
||
impl TaskHandle { | ||
pub fn is_shutting_down(&self) -> bool { | ||
self.inner.is_shutting_down.load(SeqCst) | ||
} | ||
} | ||
|
||
#[cfg(not(target_family = "wasm"))] | ||
mod imp { | ||
pub use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; | ||
|
||
use super::*; | ||
|
||
pub fn spawn<F>(future: F) | ||
pub(crate) fn spawn<F>(future: F) -> Option<JoinHandle<()>> | ||
where | ||
F: Future<Output = ()> + Send + 'static, | ||
{ | ||
tokio::spawn(future); | ||
Some(tokio::spawn(future)) | ||
} | ||
|
||
pub fn block_in_place<F, R>(f: F) -> R | ||
|
@@ -52,12 +193,13 @@ mod imp { | |
|
||
use super::*; | ||
|
||
pub fn spawn<F>(future: F) | ||
pub(crate) fn spawn<F>(future: F) -> Option<JoinHandle<()>> | ||
where | ||
// No Send needed on wasm | ||
F: Future<Output = ()> + 'static, | ||
{ | ||
wasm_bindgen_futures::spawn_local(future) | ||
wasm_bindgen_futures::spawn_local(future); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For |
||
None | ||
} | ||
|
||
pub fn block_in_place<F, R>(f: F) -> R | ||
|
@@ -87,5 +229,3 @@ mod imp { | |
} | ||
} | ||
} | ||
|
||
pub use imp::*; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At minimum - we should have timeouts for any network operations, at least on the client side. It's better to return an error and have user retry, than just hang forever with no feedback to the user.