Permalink
Browse files

Fix a few more device-loss bugs.

  • Loading branch information...
hrydgard committed Nov 10, 2017
1 parent 1c76d28 commit 378e01625c9abb05149d33439d047afbe7974e7b
Showing with 28 additions and 9 deletions.
  1. +1 −2 GPU/Vulkan/DrawEngineVulkan.cpp
  2. +0 −2 GPU/Vulkan/GPU_Vulkan.h
  3. +6 −0 UI/NativeApp.cpp
  4. +21 −5 ext/native/thin3d/VulkanRenderManager.cpp
@@ -251,10 +251,8 @@ void DrawEngineVulkan::DestroyDeviceObjects() {
vulkan_->Delete().QueueDeleteSampler(nullSampler_);
if (pipelineLayout_ != VK_NULL_HANDLE)
vulkan_->Delete().QueueDeletePipelineLayout(pipelineLayout_);
pipelineLayout_ = VK_NULL_HANDLE;
if (descriptorSetLayout_ != VK_NULL_HANDLE)
vulkan_->Delete().QueueDeleteDescriptorSetLayout(descriptorSetLayout_);
descriptorSetLayout_ = VK_NULL_HANDLE;
if (nullTexture_) {
nullTexture_->Destroy();
delete nullTexture_;
@@ -263,6 +261,7 @@ void DrawEngineVulkan::DestroyDeviceObjects() {
vertexCache_->Destroy(vulkan_);
delete vertexCache_;
vertexCache_ = nullptr;
vai_.Clear(); // Need to clear this to get rid of all remaining references to the dead buffers.
}
void DrawEngineVulkan::DeviceLost() {
View
@@ -119,8 +119,6 @@ class GPU_Vulkan : public GPUCommon {
// Manages state and pipeline objects
PipelineManagerVulkan *pipelineManager_;
int lastVsync_;
VkCommandBuffer curCmd_;
int vertexCost_ = 0;
std::string reportingPrimaryInfo_;
View
@@ -845,6 +845,12 @@ void NativeRender(GraphicsContext *graphicsContext) {
graphicsContext->Resize();
screenManager->resized();
// Do not enable unless you are testing device-loss on Windows
#if 0
screenManager->deviceLost();
screenManager->deviceRestore();
#endif
// TODO: Move this to new GraphicsContext objects for each backend.
#ifndef _WIN32
if (GetGPUBackend() == GPUBackend::OPENGL) {
@@ -194,7 +194,7 @@ void VulkanRenderManager::CreateBackbuffers() {
run_ = true;
// Won't necessarily be 0.
threadInitFrame_ = vulkan_->GetCurFrame();
ILOG("Starting Vulkan submission thread");
ILOG("Starting Vulkan submission thread (threadInitFrame_ = %d)", vulkan_->GetCurFrame());
thread_ = std::thread(&VulkanRenderManager::ThreadFunc, this);
}
}
@@ -215,12 +215,21 @@ void VulkanRenderManager::StopThread() {
}
}
thread_.join();
ILOG("Vulkan submission thread joined.");
ILOG("Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());
// Eat whatever has been queued up for this frame if anything.
Wipe();

This comment has been minimized.

Show comment
Hide comment
@unknownbrackets

unknownbrackets Nov 10, 2017

Collaborator

A race condition that could cause problems here:

join() was not blocking for me, on Windows. So after thread_.join() completed, the thread was still busy submitting commands. This could cause a crash (in my case, it caused the fence to be resignaled before the submit.) We need to wait for the thread to stop before doing things like this.

-[Unknown]

@unknownbrackets

unknownbrackets Nov 10, 2017

Collaborator

A race condition that could cause problems here:

join() was not blocking for me, on Windows. So after thread_.join() completed, the thread was still busy submitting commands. This could cause a crash (in my case, it caused the fence to be resignaled before the submit.) We need to wait for the thread to stop before doing things like this.

-[Unknown]

This comment has been minimized.

Show comment
Hide comment
@hrydgard

hrydgard Nov 10, 2017

Owner

What? join() should absolutely be blocking. Are you sure about this?

@hrydgard

hrydgard Nov 10, 2017

Owner

What? join() should absolutely be blocking. Are you sure about this?

This comment has been minimized.

Show comment
Hide comment
@hrydgard

hrydgard Nov 10, 2017

Owner

It's not just that the device kept going in the background, which I've now prevented with a vkDeviceWaitIdle?

@hrydgard

hrydgard Nov 10, 2017

Owner

It's not just that the device kept going in the background, which I've now prevented with a vkDeviceWaitIdle?

This comment has been minimized.

Show comment
Hide comment
@unknownbrackets

unknownbrackets Nov 10, 2017

Collaborator

I definitely saw this, but assumed detach had been called and didn't look into it further.

This is why 6c1f661, frameData.fence = vulkan_->CreateFence(true); happened before vkQueueSubmit on resize for me (if I resized enough times on fast forward.) The debugger showed we had already passed join.

Maybe I missed some VS update... I don't see a detach.

-[Unknown]

@unknownbrackets

unknownbrackets Nov 10, 2017

Collaborator

I definitely saw this, but assumed detach had been called and didn't look into it further.

This is why 6c1f661, frameData.fence = vulkan_->CreateFence(true); happened before vkQueueSubmit on resize for me (if I resized enough times on fast forward.) The debugger showed we had already passed join.

Maybe I missed some VS update... I don't see a detach.

-[Unknown]

This comment has been minimized.

Show comment
Hide comment
@unknownbrackets

unknownbrackets Nov 10, 2017

Collaborator

Unless I made a mistake - one thread was recreating a fence, the other was submitting. If it doesn't repro on resize anymore, then maybe it was another bug.

-[Unknown]

@unknownbrackets

unknownbrackets Nov 10, 2017

Collaborator

Unless I made a mistake - one thread was recreating a fence, the other was submitting. If it doesn't repro on resize anymore, then maybe it was another bug.

-[Unknown]

// Wait for any fences to finish and be resignaled, so we don't have sync issues.
// Also clean out any queued data, which might refer to things that might not be valid
// when we restart...
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
auto &frameData = frameData_[i];
if (frameData.readyForRun || frameData.hasInitCommands || frameData.steps.size() != 0) {
Crash();
}
frameData.readyForRun = false;
frameData.steps.clear();
std::unique_lock<std::mutex> lock(frameData.push_mutex);
while (!frameData.readyForFence) {
@@ -277,6 +286,7 @@ void VulkanRenderManager::ThreadFunc() {
setCurrentThreadName("RenderMan");
int threadFrame = threadInitFrame_;
bool nextFrame = false;
bool firstFrame = true;
while (true) {
{
if (nextFrame) {
@@ -304,9 +314,17 @@ void VulkanRenderManager::ThreadFunc() {
assert(frameData.type == VKRRunType::END || frameData.type == VKRRunType::SYNC);
}
VLOG("PULL: Running frame %d", threadFrame);
if (firstFrame) {
ILOG("Running first frame (%d)", threadFrame);
firstFrame = false;
}
Run(threadFrame);
VLOG("PULL: Finished frame %d", threadFrame);
}
// Wait for the device to be done with everything, before tearing stuff down.
vkDeviceWaitIdle(vulkan_->GetDevice());
VLOG("PULL: Quitting");
}
@@ -374,9 +392,7 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR
curRenderStep_ = nullptr;
}
if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
#ifdef _DEBUG
ILOG("Empty render step. Usually happens after uploading pixels..");
#endif
VLOG("Empty render step. Usually happens after uploading pixels..");
}
VKRStep *step = new VKRStep{ VKRStepType::RENDER };

0 comments on commit 378e016

Please sign in to comment.