Skip to content

Commit

Permalink
Shorten lock durations to avoid deadlocks
Browse files Browse the repository at this point in the history
  • Loading branch information
SludgePhD committed Nov 18, 2023
1 parent 0e437be commit 724fc1a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 22 deletions.
14 changes: 9 additions & 5 deletions wgpu-core/src/device/global.rs
Original file line number Diff line number Diff line change
Expand Up @@ -734,11 +734,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
if let resource::TextureInner::Native { ref raw } = *texture.inner().as_ref().unwrap() {
if !raw.is_none() {
let temp = queue::TempResource::Texture(texture.clone());
let mut pending_writes = device.pending_writes.lock();
let pending_writes = pending_writes.as_mut().unwrap();
let mut guard = device.pending_writes.lock();
let pending_writes = guard.as_mut().unwrap();
if pending_writes.dst_textures.contains_key(&texture_id) {
pending_writes.temp_resources.push(temp);
} else {
drop(guard);
device
.lock_life()
.schedule_resource_destruction(temp, last_submit_index);
Expand All @@ -763,7 +764,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {

let device = &texture.device;
{
let mut life_lock = device.lock_life();
if device
.pending_writes
.lock()
Expand All @@ -772,9 +772,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.dst_textures
.contains_key(&texture_id)
{
life_lock.future_suspected_textures.push(texture.clone());
device
.lock_life()
.future_suspected_textures
.push(texture.clone());
} else {
life_lock
device
.lock_life()
.suspected_resources
.insert(texture_id, texture.clone());
}
Expand Down
4 changes: 2 additions & 2 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1147,9 +1147,9 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
// a temporary one, since the chains are not finished.
let mut temp_suspected = device.temp_suspected.lock();
{
let mut suspected = temp_suspected.take().unwrap();
let mut suspected =
temp_suspected.replace(ResourceMaps::new::<A>()).unwrap();
suspected.clear();
temp_suspected.replace(ResourceMaps::new::<A>());
}

// finish all the command buffers first
Expand Down
34 changes: 19 additions & 15 deletions wgpu-core/src/device/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,18 +312,19 @@ impl<A: HalApi> Device<A> {
) -> Result<(UserClosures, bool), WaitIdleError> {
profiling::scope!("Device::maintain");
{
let mut life_tracker = self.lock_life();

// Normally, `temp_suspected` exists only to save heap
// allocations: it's cleared at the start of the function
// call, and cleared by the end. But `Global::queue_submit` is
// fallible; if it exits early, it may leave some resources in
// `temp_suspected`.
{
let temp_suspected = self.temp_suspected.lock().take().unwrap();
life_tracker.suspected_resources.extend(temp_suspected);
}
self.temp_suspected.lock().replace(ResourceMaps::new::<A>());
let temp_suspected = self
.temp_suspected
.lock()
.replace(ResourceMaps::new::<A>())
.unwrap();

let mut life_tracker = self.lock_life();
life_tracker.suspected_resources.extend(temp_suspected);

life_tracker.triage_suspected(
hub,
Expand Down Expand Up @@ -397,7 +398,11 @@ impl<A: HalApi> Device<A> {
}

pub(crate) fn untrack(&self, trackers: &Tracker<A>) {
let mut temp_suspected = self.temp_suspected.lock().take().unwrap();
let mut temp_suspected = self
.temp_suspected
.lock()
.replace(ResourceMaps::new::<A>())
.unwrap();
temp_suspected.clear();
// As the tracker is cleared/dropped, we need to consider all the resources
// that it references for destruction in the next GC pass.
Expand Down Expand Up @@ -444,7 +449,6 @@ impl<A: HalApi> Device<A> {
}
}
self.lock_life().suspected_resources.extend(temp_suspected);
self.temp_suspected.lock().replace(ResourceMaps::new::<A>());
}

pub(crate) fn create_buffer(
Expand Down Expand Up @@ -3203,8 +3207,8 @@ impl<A: HalApi> Device<A> {
&self,
submission_index: SubmissionIndex,
) -> Result<(), WaitIdleError> {
let fence = self.fence.read();
let fence = fence.as_ref().unwrap();
let guard = self.fence.read();
let fence = guard.as_ref().unwrap();
let last_done_index = unsafe {
self.raw
.as_ref()
Expand All @@ -3221,6 +3225,7 @@ impl<A: HalApi> Device<A> {
.wait(fence, submission_index, !0)
.map_err(DeviceError::from)?
};
drop(guard);
let closures = self.lock_life().triage_submissions(
submission_index,
self.command_allocator.lock().as_mut().unwrap(),
Expand Down Expand Up @@ -3277,9 +3282,8 @@ impl<A: HalApi> Device<A> {
self.valid.store(false, Ordering::Release);

// 1) Resolve the GPUDevice device.lost promise.
let mut life_tracker = self.lock_life();
if life_tracker.device_lost_closure.is_some() {
let device_lost_closure = life_tracker.device_lost_closure.take().unwrap();
let closure = self.lock_life().device_lost_closure.take();
if let Some(device_lost_closure) = closure {
device_lost_closure.call(DeviceLostReason::Unknown, message.to_string());
}

Expand Down Expand Up @@ -3310,7 +3314,6 @@ impl<A: HalApi> Device<A> {
/// Wait for idle and remove resources that we can, before we die.
pub(crate) fn prepare_to_die(&self) {
self.pending_writes.lock().as_mut().unwrap().deactivate();
let mut life_tracker = self.lock_life();
let current_index = self.active_submission_index.load(Ordering::Relaxed);
if let Err(error) = unsafe {
let fence = self.fence.read();
Expand All @@ -3322,6 +3325,7 @@ impl<A: HalApi> Device<A> {
} {
log::error!("failed to wait for the device: {:?}", error);
}
let mut life_tracker = self.lock_life();
let _ = life_tracker.triage_submissions(
current_index,
self.command_allocator.lock().as_mut().unwrap(),
Expand Down

0 comments on commit 724fc1a

Please sign in to comment.