Skip to content

Commit 0d56577

Browse files
committed
Fixed issues with profiler#
1 parent 5033dc2 commit 0d56577

File tree

9 files changed

+84
-39
lines changed

9 files changed

+84
-39
lines changed

sdf_d3d12/profile_config/profile.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
{
44
"name": "drops",
55

6-
"initial_brick_size": 1.0,
6+
"initial_brick_size": 0.125,
77
"linear_increment": false,
88
"brick_size_multiplier": 0.5,
9-
"num_iterations": 4,
9+
"num_iterations": 1,
1010

1111
"camera": {
1212
"focal_point": [ 0, 5, 0 ],
@@ -19,7 +19,7 @@
1919
"initial_brick_size": 0.5,
2020
"linear_increment": false,
2121
"brick_size_multiplier": 0.5,
22-
"num_iterations": 3,
22+
"num_iterations": 1,
2323

2424
"camera": {
2525
"orbit_radius": 15.0
@@ -31,7 +31,7 @@
3131
"initial_brick_size": 1.0,
3232
"linear_increment": false,
3333
"brick_size_multiplier": 0.5,
34-
"num_iterations": 4,
34+
"num_iterations": 1,
3535

3636
"camera": {
3737
"focal_point": [ 0, -2, 0 ],

sdf_d3d12/src/Application/D3DApplication.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ bool D3DApplication::ParseCommandLineArgs(LPWSTR argv[], int argc)
120120
// Force various modes for profiling
121121
m_UseOrbitalCamera = true;
122122
m_DisableGUI = true;
123-
m_ToggleFullscreen = true;
123+
//m_ToggleFullscreen = true;
124124

125125
const auto& demoConfig = m_ProfileConfig.DemoConfigs[0];
126126

@@ -301,8 +301,8 @@ void D3DApplication::OnRender()
301301
m_GraphicsContext->UpdatePassCB(&m_Timer, &m_Camera, m_RenderFlags, m_HeatmapQuantization, m_HeatmapHueRange);
302302

303303
// Begin drawing
304-
PROFILE_DIRECT_BEGIN_PASS("Frame");
305304
m_GraphicsContext->StartDraw();
305+
PROFILE_DIRECT_BEGIN_PASS("Frame", g_D3DGraphicsContext->GetCommandList());
306306

307307
// Tell the scene that render is happening
308308
// This will update acceleration structures and other things to render the scene
@@ -316,8 +316,8 @@ void D3DApplication::OnRender()
316316
ImGui_ImplDX12_RenderDrawData(ImGui::GetDrawData(), m_GraphicsContext->GetCommandList());
317317

318318
// End draw
319+
PROFILE_DIRECT_END_PASS(g_D3DGraphicsContext->GetCommandList());
319320
m_GraphicsContext->EndDraw();
320-
PROFILE_DIRECT_END_PASS();
321321

322322
// For multiple ImGui viewports
323323
const ImGuiIO& io = ImGui::GetIO();

sdf_d3d12/src/Application/D3DApplication.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class D3DApplication : public BaseApplication
5959
std::unique_ptr<Scene> m_Scene;
6060
std::unique_ptr<Raytracer> m_Raytracer;
6161

62-
std::string m_DefaultDemo = "rain";
62+
std::string m_DefaultDemo = "drops";
6363

6464
// Flags to pass to the renderer
6565
UINT m_RenderFlags = RENDER_FLAG_DISPLAY_NORMALS;

sdf_d3d12/src/Application/Demos.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,16 +220,19 @@ RainDemo::RainDemo()
220220
m_RainDrops.resize(m_RainDropCount);
221221
for (auto& drop : m_RainDrops)
222222
{
223+
const float initialHeight = Random::Float(m_FloorHeight, m_CloudHeight);
224+
223225
drop.Mass = Random::Float(0.1f, 0.3f);
224226
drop.Radius = 0.01f;
225227
drop.BlendFactor = 0.0f;
226228

227229
drop.Position = {
228230
Random::Float(1.0f - m_Dimensions, m_Dimensions - 1.0f),
229-
m_CloudHeight,
231+
initialHeight,
230232
Random::Float(1.0f - m_Dimensions, m_Dimensions - 1.0f)
231233
};
232-
drop.Velocity = Random::Float(-5.0f, 0.0f);
234+
const float v2 = 2.0f * m_Gravity * (initialHeight - m_CloudHeight);
235+
drop.Velocity = -sqrtf(fabsf(v2));
233236
}
234237

235238
m_Clouds.resize(m_CloudCount);

sdf_d3d12/src/Renderer/Profiling/GPUProfiler.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,25 +104,25 @@ void GPUProfiler::CaptureNextFrame()
104104
}
105105
}
106106

107-
void GPUProfiler::BeginPass(GPUProfilerQueue queue, const char* name)
107+
void GPUProfiler::BeginPass(GPUProfilerQueue queue, const char* name, ID3D12GraphicsCommandList* commandList)
108108
{
109109
if (queue != m_Queue)
110110
return;
111111

112112
if (m_InCollection)
113113
{
114-
BeginPassImpl(name);
114+
BeginPassImpl(name, commandList);
115115
}
116116

117117
}
118-
void GPUProfiler::EndPass(GPUProfilerQueue queue)
118+
void GPUProfiler::EndPass(GPUProfilerQueue queue, ID3D12GraphicsCommandList* commandList)
119119
{
120120
if (queue != m_Queue)
121121
return;
122122

123123
if (m_InCollection)
124124
{
125-
EndPassImpl();
125+
EndPassImpl(commandList);
126126
}
127127
}
128128

sdf_d3d12/src/Renderer/Profiling/GPUProfiler.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ class GPUProfiler
4949
void CaptureNextFrame();
5050

5151
// For profiling macros - to be embedded in application source
52-
void BeginPass(GPUProfilerQueue queue, const char* name);
53-
void EndPass(GPUProfilerQueue queue);
52+
void BeginPass(GPUProfilerQueue queue, const char* name, ID3D12GraphicsCommandList* commandList);
53+
void EndPass(GPUProfilerQueue queue, ID3D12GraphicsCommandList* commandList);
5454

5555
// Index is an optional integer to place at the end of the range name
5656
// in the case where a range may be called in a loop but should be profiled separately
@@ -72,8 +72,8 @@ class GPUProfiler
7272

7373
virtual void CaptureNextFrameImpl() = 0;
7474

75-
virtual void BeginPassImpl(const char* name) = 0;
76-
virtual void EndPassImpl() = 0;
75+
virtual void BeginPassImpl(const char* name, ID3D12GraphicsCommandList* commandList) = 0;
76+
virtual void EndPassImpl(ID3D12GraphicsCommandList* commandList) = 0;
7777

7878
virtual void PushRangeImpl(const char* name) = 0;
7979
virtual void PushRangeImpl(const char* name, ID3D12GraphicsCommandList* commandList) = 0;
@@ -89,7 +89,7 @@ class GPUProfiler
8989
bool m_InCollection = false;
9090

9191
static constexpr double s_WarmupTime = 1.0; // Wait 1s to allow the clock to stabilize before beginning to profile.
92-
static constexpr size_t s_MaxNumRanges = 32;
92+
static constexpr size_t s_MaxNumRanges = 16;
9393
static constexpr uint16_t s_NumNestingLevels = 4;
9494

9595
LARGE_INTEGER m_ClockFreq;
@@ -106,15 +106,15 @@ class GPUProfiler
106106
#define PROFILE_CAPTURE_NEXT_FRAME() ::GPUProfiler::Get().CaptureNextFrame()
107107

108108
// Direct Queue profiling
109-
#define PROFILE_DIRECT_BEGIN_PASS(name) ::GPUProfiler::Get().BeginPass(GPUProfilerQueue::Direct, name)
110-
#define PROFILE_DIRECT_END_PASS() ::GPUProfiler::Get().EndPass(GPUProfilerQueue::Direct)
109+
#define PROFILE_DIRECT_BEGIN_PASS(name, ...) ::GPUProfiler::Get().BeginPass(GPUProfilerQueue::Direct, name, __VA_ARGS__)
110+
#define PROFILE_DIRECT_END_PASS(...) ::GPUProfiler::Get().EndPass(GPUProfilerQueue::Direct, __VA_ARGS__)
111111

112112
#define PROFILE_DIRECT_PUSH_RANGE(...) ::GPUProfiler::Get().PushRange(GPUProfilerQueue::Direct, __VA_ARGS__)
113113
#define PROFILE_DIRECT_POP_RANGE(...) ::GPUProfiler::Get().PopRange(GPUProfilerQueue::Direct, __VA_ARGS__)
114114

115115
// Compute Queue
116-
#define PROFILE_COMPUTE_BEGIN_PASS(name) ::GPUProfiler::Get().BeginPass(GPUProfilerQueue::Compute, name)
117-
#define PROFILE_COMPUTE_END_PASS() ::GPUProfiler::Get().EndPass(GPUProfilerQueue::Compute)
116+
#define PROFILE_COMPUTE_BEGIN_PASS(name, ...) ::GPUProfiler::Get().BeginPass(GPUProfilerQueue::Compute, name, __VA_ARGS__)
117+
#define PROFILE_COMPUTE_END_PASS(...) ::GPUProfiler::Get().EndPass(GPUProfilerQueue::Compute, __VA_ARGS__)
118118

119119
#define PROFILE_COMPUTE_PUSH_RANGE(...) ::GPUProfiler::Get().PushRange(GPUProfilerQueue::Compute, __VA_ARGS__)
120120
#define PROFILE_COMPUTE_POP_RANGE(...) ::GPUProfiler::Get().PopRange(GPUProfilerQueue::Compute, __VA_ARGS__)
@@ -123,14 +123,14 @@ class GPUProfiler
123123

124124
#define PROFILE_CAPTURE_NEXT_FRAME()
125125

126-
#define PROFILE_DIRECT_BEGIN_PASS(name)
127-
#define PROFILE_DIRECT_END_PASS()
126+
#define PROFILE_DIRECT_BEGIN_PASS(name, ...)
127+
#define PROFILE_DIRECT_END_PASS(...)
128128

129129
#define PROFILE_DIRECT_PUSH_RANGE(...)
130130
#define PROFILE_DIRECT_POP_RANGE(...)
131131

132-
#define PROFILE_COMPUTE_BEGIN_PASS(name)
133-
#define PROFILE_COMPUTE_END_PASS()
132+
#define PROFILE_COMPUTE_BEGIN_PASS(name, ...)
133+
#define PROFILE_COMPUTE_END_PASS(...)
134134

135135
#define PROFILE_COMPUTE_PUSH_RANGE(...)
136136
#define PROFILE_COMPUTE_POP_RANGE(...)

sdf_d3d12/src/Renderer/Profiling/NvGPUProfiler.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ void NvGPUProfiler::Init(ID3D12Device* device, ID3D12CommandQueue* queue, const
8989
THROW_IF_FALSE(m_Profiler.BeginSession(queue, sessionOptions), "Failed to begin session.");
9090
}
9191

92+
m_RangeCommands.Initialize(m_Device);
93+
9294
THROW_IF_FALSE(m_Profiler.EnqueueCounterCollection(m_CounterConfiguration, s_NumNestingLevels), "Failed to enqueue counter collection.");
9395

9496
LOG_INFO("NV Perf GPUProfiler created successfully");
@@ -102,44 +104,63 @@ void NvGPUProfiler::CaptureNextFrameImpl()
102104
}
103105

104106

105-
void NvGPUProfiler::BeginPassImpl(const char* name)
107+
void NvGPUProfiler::BeginPassImpl(const char* name, ID3D12GraphicsCommandList* commandList)
106108
{
107109
if (!m_Profiler.AllPassesSubmitted())
108110
{
109111
THROW_IF_FALSE(m_Profiler.BeginPass(), "Failed to begin a pass.");
110-
PushRangeImpl(name);
112+
PushRangeImpl(name, commandList);
111113
}
112114
}
113115

114-
void NvGPUProfiler::EndPassImpl()
116+
void NvGPUProfiler::EndPassImpl(ID3D12GraphicsCommandList* commandList)
115117
{
116118
if (!m_Profiler.AllPassesSubmitted() && m_Profiler.IsInPass())
117119
{
118-
PopRangeImpl(); // Frame
120+
//PopRangeImpl(commandList); // Frame
119121
THROW_IF_FALSE(m_Profiler.EndPass(), "Failed to end a pass.");
120122

121123
m_DataReady = true;
122124
}
125+
/*
126+
nv::perf::profiler::DecodeResult decodeResult;
127+
THROW_IF_FALSE(m_Profiler.DecodeCounters(decodeResult), "Failed to decode counters.");
128+
if (decodeResult.allStatisticalSamplesCollected)
129+
{
130+
THROW_IF_FALSE(nv::perf::MetricsEvaluatorSetDeviceAttributes(m_MetricsEvaluator, decodeResult.counterDataImage.data(), decodeResult.counterDataImage.size()), "Failed MetricsEvaluatorSetDeviceAttributes.");
131+
132+
const size_t numRanges = nv::perf::CounterDataGetNumRanges(decodeResult.counterDataImage.data());
133+
LOG_WARN(numRanges);
134+
135+
m_InCollection = false;
136+
}
137+
*/
123138
}
124139

125140
void NvGPUProfiler::PushRangeImpl(const char* name)
126141
{
127-
THROW_IF_FALSE(m_Profiler.PushRange(name), "Failed to push a range");
142+
if (m_InCollection)
143+
{
144+
THROW_IF_FALSE(m_Profiler.PushRange(name), "Failed to push a range");
145+
}
128146
}
129147

130148
void NvGPUProfiler::PushRangeImpl(const char* name, ID3D12GraphicsCommandList* commandList)
131149
{
132-
THROW_IF_FALSE(nv::perf::profiler::D3D12PushRange(commandList, name), "Failed to push a range");
150+
THROW_IF_FALSE(m_RangeCommands.PushRange(commandList, name), "Failed to push a range");
133151
}
134152

135153
void NvGPUProfiler::PopRangeImpl()
136154
{
137-
THROW_IF_FALSE(m_Profiler.PopRange(), "Failed to pop a range");
155+
if (m_InCollection)
156+
{
157+
THROW_IF_FALSE(m_Profiler.PopRange(), "Failed to pop a range");
158+
}
138159
}
139160

140161
void NvGPUProfiler::PopRangeImpl(ID3D12GraphicsCommandList* commandList)
141162
{
142-
THROW_IF_FALSE(nv::perf::profiler::D3D12PopRange(commandList), "Failed to pop a range");
163+
THROW_IF_FALSE(m_RangeCommands.PopRange(commandList), "Failed to pop a range");
143164
}
144165

145166

@@ -158,6 +179,7 @@ bool NvGPUProfiler::DecodeData(std::vector<std::stringstream>& outMetrics)
158179
const size_t numRanges = nv::perf::CounterDataGetNumRanges(decodeResult.counterDataImage.data());
159180
outMetrics.resize(numRanges);
160181
std::vector<double> metricValues(m_MetricEvalRequests.size());
182+
LOG_WARN(numRanges);
161183

162184
for (size_t rangeIndex = 0; rangeIndex < numRanges; rangeIndex++)
163185
{

sdf_d3d12/src/Renderer/Profiling/NvGPUProfiler.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ class NvGPUProfiler : public GPUProfiler
3131

3232
virtual void CaptureNextFrameImpl() override;
3333

34-
virtual void BeginPassImpl(const char* name) override;
35-
virtual void EndPassImpl() override;
34+
virtual void BeginPassImpl(const char* name, ID3D12GraphicsCommandList* commandList) override;
35+
virtual void EndPassImpl(ID3D12GraphicsCommandList* commandList) override;
3636

3737
virtual void PushRangeImpl(const char* name) override;
3838
virtual void PushRangeImpl(const char* name, ID3D12GraphicsCommandList* commandList) override;
@@ -51,10 +51,12 @@ class NvGPUProfiler : public GPUProfiler
5151
size_t m_DeviceIndex = 0;
5252

5353
nv::perf::profiler::RangeProfilerD3D12 m_Profiler;
54+
nv::perf::profiler::D3D12RangeCommands m_RangeCommands;
5455

5556
NVPW_Device_ClockStatus m_ClockStatus = NVPW_DEVICE_CLOCK_STATUS_UNKNOWN; // Used to restore clock state when exiting.
5657

5758
bool m_DataReady = false;
59+
std::queue<nv::perf::profiler::DecodeResult> m_DecodeResults;
5860
};
5961

6062
#endif

sdf_d3d12/src/SDF/Factory/SDFFactoryHierarchical.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,6 @@ void SDFFactoryHierarchical::PerformSDFBake_CPUBlocking(const std::wstring& pipe
422422
}
423423

424424
PIXBeginEvent(m_CommandList.Get(), PIX_COLOR_INDEX(40), L"SDF Bake");
425-
PROFILE_COMPUTE_BEGIN_PASS("SDF Bake");
426425

427426
PIXBeginEvent(PIX_COLOR_INDEX(51), L"Set up resources");
428427
{
@@ -441,6 +440,8 @@ void SDFFactoryHierarchical::PerformSDFBake_CPUBlocking(const std::wstring& pipe
441440
}
442441
PIXEndEvent();
443442

443+
PROFILE_COMPUTE_BEGIN_PASS("SDF Bake", m_CommandList.Get());
444+
444445
BuildCommandList_Setup(pipelineSet, object, m_Resources);
445446
BuildCommandList_HierarchicalBrickBuilding(pipelineSet, object, m_Resources, maxIterations);
446447

@@ -479,7 +480,6 @@ void SDFFactoryHierarchical::PerformSDFBake_CPUBlocking(const std::wstring& pipe
479480

480481
BuildCommandList_BrickEvaluation(pipelineSet, object, m_Resources);
481482

482-
PROFILE_COMPUTE_END_PASS();
483483
PIXEndEvent(m_CommandList.Get()); // SDF Bake
484484
{
485485
// Execute command list
@@ -489,6 +489,24 @@ void SDFFactoryHierarchical::PerformSDFBake_CPUBlocking(const std::wstring& pipe
489489

490490
computeQueue->WaitForFenceCPUBlocking(m_PreviousWorkFence);
491491
}
492+
493+
#ifdef ENABLE_INSTRUMENTATION
494+
// For whatever reason, end pass needs to be in a separate command list to receive data from the second command list (brick eval)
495+
// But for my purposes I don't need to bother fixing it
496+
// The profiling data gathered will still be accurate
497+
{
498+
THROW_IF_FAIL(m_CommandAllocator->Reset());
499+
THROW_IF_FAIL(m_CommandList->Reset(m_CommandAllocator.Get(), nullptr));
500+
501+
PROFILE_COMPUTE_END_PASS(m_CommandList.Get());
502+
503+
THROW_IF_FAIL(m_CommandList->Close());
504+
ID3D12CommandList* ppCommandLists[] = { m_CommandList.Get() };
505+
m_PreviousWorkFence = computeQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
506+
507+
computeQueue->WaitForFenceCPUBlocking(m_PreviousWorkFence);
508+
}
509+
#endif
492510
}
493511

494512
void SDFFactoryHierarchical::BuildCommandList_Setup(const PipelineSet& pipeline, SDFObject* object, SDFConstructionResources& resources) const

0 commit comments

Comments
 (0)