-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
VulkanQueueRunner.h
356 lines (310 loc) · 9.76 KB
/
VulkanQueueRunner.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
#pragma once
#include <cstdint>
#include <mutex>
#include <condition_variable>
#include "Common/Thread/Promise.h"
#include "Common/Data/Collections/Hashmaps.h"
#include "Common/Data/Collections/FastVec.h"
#include "Common/GPU/Vulkan/VulkanContext.h"
#include "Common/GPU/Vulkan/VulkanBarrier.h"
#include "Common/GPU/Vulkan/VulkanFrameData.h"
#include "Common/GPU/Vulkan/VulkanFramebuffer.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "Common/Data/Collections/TinySet.h"
#include "Common/GPU/DataFormat.h"
class VKRFramebuffer;
struct VKRGraphicsPipeline;
struct VKRComputePipeline;
struct VKRImage;
struct VKRPipelineLayout;
struct FrameData;
enum {
QUEUE_HACK_MGS2_ACID = 1,
QUEUE_HACK_SONIC = 2,
QUEUE_HACK_RENDERPASS_MERGE = 8,
};
enum class VKRRenderCommand : uint8_t {
REMOVED,
BIND_GRAPHICS_PIPELINE, // async
STENCIL,
BLEND,
VIEWPORT,
SCISSOR,
CLEAR,
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
DEBUG_ANNOTATION,
NUM_RENDER_COMMANDS,
};
enum class PipelineFlags : u8 {
NONE = 0,
USES_BLEND_CONSTANT = (1 << 1),
USES_DEPTH_STENCIL = (1 << 2), // Reads or writes the depth or stencil buffers.
USES_GEOMETRY_SHADER = (1 << 3),
USES_MULTIVIEW = (1 << 4), // Inherited from the render pass it was created with.
USES_DISCARD = (1 << 5),
};
ENUM_CLASS_BITOPS(PipelineFlags);
struct VkRenderData {
VKRRenderCommand cmd;
union {
struct {
VkPipeline pipeline;
VKRPipelineLayout *pipelineLayout;
} pipeline;
struct {
VKRGraphicsPipeline *pipeline;
VKRPipelineLayout *pipelineLayout;
} graphics_pipeline;
struct {
VKRComputePipeline *pipeline;
VKRPipelineLayout *pipelineLayout;
} compute_pipeline;
struct {
uint32_t descSetIndex;
int numUboOffsets;
uint32_t uboOffsets[3];
VkBuffer vbuffer;
VkDeviceSize voffset;
uint32_t count;
uint32_t offset;
} draw;
struct {
uint32_t descSetIndex;
uint32_t uboOffsets[3];
uint16_t numUboOffsets;
uint16_t instances;
VkBuffer vbuffer;
VkBuffer ibuffer;
uint32_t voffset;
uint32_t ioffset;
uint32_t count;
} drawIndexed;
struct {
uint32_t clearColor;
float clearZ;
int clearStencil;
int clearMask; // VK_IMAGE_ASPECT_COLOR_BIT etc
} clear;
struct {
VkViewport vp;
} viewport;
struct {
VkRect2D scissor;
} scissor;
struct {
uint8_t stencilWriteMask;
uint8_t stencilCompareMask;
uint8_t stencilRef;
} stencil;
struct {
uint32_t color;
} blendColor;
struct {
VkShaderStageFlags stages;
uint8_t offset;
uint8_t size;
uint8_t data[40]; // Should be enough for now.
} push;
struct {
const char *annotation;
} debugAnnotation;
struct {
int setIndex;
} bindDescSet;
};
};
enum class VKRStepType : uint8_t {
RENDER,
RENDER_SKIP,
COPY,
BLIT,
READBACK,
READBACK_IMAGE,
};
struct TransitionRequest {
VKRFramebuffer *fb;
VkImageAspectFlags aspect; // COLOR or DEPTH
VkImageLayout targetLayout;
bool operator == (const TransitionRequest &other) const {
return fb == other.fb && aspect == other.aspect && targetLayout == other.targetLayout;
}
};
class VKRRenderPass;
struct VKRStep {
VKRStep(VKRStepType _type) : stepType(_type) {}
~VKRStep() {}
VKRStepType stepType;
FastVec<VkRenderData> commands;
TinySet<TransitionRequest, 4> preTransitions;
TinySet<VKRFramebuffer *, 8> dependencies;
const char *tag;
union {
struct {
VKRFramebuffer *framebuffer;
VKRRenderPassLoadAction colorLoad;
VKRRenderPassLoadAction depthLoad;
VKRRenderPassLoadAction stencilLoad;
VKRRenderPassStoreAction colorStore;
VKRRenderPassStoreAction depthStore;
VKRRenderPassStoreAction stencilStore;
u8 clearStencil;
uint32_t clearColor;
float clearDepth;
int numDraws;
// Downloads and textures from this pass.
int numReads;
VkImageLayout finalColorLayout;
VkImageLayout finalDepthStencilLayout;
PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT
VkRect2D renderArea;
// Render pass type. Deduced after finishing recording the pass, from the used pipelines.
// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.
RenderPassType renderPassType;
} render;
struct {
VKRFramebuffer *src;
VKRFramebuffer *dst;
VkRect2D srcRect;
VkOffset2D dstPos;
int aspectMask;
} copy;
struct {
VKRFramebuffer *src;
VKRFramebuffer *dst;
VkRect2D srcRect;
VkRect2D dstRect;
int aspectMask;
VkFilter filter;
} blit;
struct {
int aspectMask;
VKRFramebuffer *src;
VkRect2D srcRect;
bool delayed;
} readback;
struct {
VkImage image;
VkRect2D srcRect;
int mipLevel;
} readback_image;
};
};
// These are enqueued from the main thread,
// and the render thread pops them off
struct VKRRenderThreadTask {
VKRRenderThreadTask(VKRRunType _runType) : runType(_runType) {}
std::vector<VKRStep *> steps;
int frame = -1;
VKRRunType runType;
// Avoid copying these by accident.
VKRRenderThreadTask(VKRRenderThreadTask &) = delete;
VKRRenderThreadTask &operator =(VKRRenderThreadTask &) = delete;
};
class VulkanQueueRunner {
public:
VulkanQueueRunner(VulkanContext *vulkan) : vulkan_(vulkan), renderPasses_(16) {}
void SetBackbuffer(VkFramebuffer fb, VkImage img) {
backbuffer_ = fb;
backbufferImage_ = img;
}
void PreprocessSteps(std::vector<VKRStep *> &steps);
void RunSteps(std::vector<VKRStep *> &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps = false);
void LogSteps(const std::vector<VKRStep *> &steps, bool verbose);
static std::string StepToString(VulkanContext *vulkan, const VKRStep &step);
void CreateDeviceObjects();
void DestroyDeviceObjects();
// Swapchain
void DestroyBackBuffers();
bool CreateSwapchain(VkCommandBuffer cmdInit);
bool HasBackbuffers() const {
return !framebuffers_.empty();
}
// Get a render pass that's compatible with all our framebuffers.
// Note that it's precached, cannot look up in the map as this might be on another thread.
VKRRenderPass *GetCompatibleRenderPass() const {
return compatibleRenderPass_;
}
inline int RPIndex(VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth) {
return (int)depth * 3 + (int)color;
}
// src == 0 means to copy from the sync readback buffer.
bool CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels);
VKRRenderPass *GetRenderPass(const RPKey &key);
bool GetRenderPassKey(VKRRenderPass *passToFind, RPKey *outKey) const {
bool found = false;
renderPasses_.Iterate([passToFind, &found, outKey](const RPKey &rpkey, const VKRRenderPass *pass) {
if (pass == passToFind) {
found = true;
*outKey = rpkey;
}
});
return found;
}
void EnableHacks(uint32_t hacks) {
hacksEnabled_ = hacks;
}
void NotifyCompileDone() {
compileDone_.notify_all();
}
void WaitForCompileNotification() {
std::unique_lock<std::mutex> lock(compileDoneMutex_);
compileDone_.wait(lock);
}
private:
bool InitBackbufferFramebuffers(int width, int height);
bool InitDepthStencilBuffer(VkCommandBuffer cmd); // Used for non-buffered rendering.
VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);
void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd, int curFrame);
void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);
void PerformBlit(const VKRStep &pass, VkCommandBuffer cmd);
void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd, FrameData &frameData);
void PerformReadbackImage(const VKRStep &pass, VkCommandBuffer cmd);
void LogRenderPass(const VKRStep &pass, bool verbose);
void LogCopy(const VKRStep &pass);
void LogBlit(const VKRStep &pass);
void LogReadback(const VKRStep &pass);
void LogReadbackImage(const VKRStep &pass);
void ResizeReadbackBuffer(CachedReadback *readback, VkDeviceSize requiredSize);
void ApplyMGSHack(std::vector<VKRStep *> &steps);
void ApplySonicHack(std::vector<VKRStep *> &steps);
void ApplyRenderPassMerge(std::vector<VKRStep *> &steps);
static void SetupTransitionToTransferSrc(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
static void SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrier *recordBarrier);
VulkanContext *vulkan_;
VkFramebuffer backbuffer_ = VK_NULL_HANDLE;
VkImage backbufferImage_ = VK_NULL_HANDLE;
// The "Compatible" render pass. Should be able to get rid of this soon.
VKRRenderPass *compatibleRenderPass_ = nullptr;
// Renderpasses, all combinations of preserving or clearing or dont-care-ing fb contents.
// Each VKRRenderPass contains all compatibility classes (which attachments they have, etc).
DenseHashMap<RPKey, VKRRenderPass *> renderPasses_;
// Readback buffer. Currently we only support synchronous readback, so we only really need one.
// We size it generously.
CachedReadback syncReadback_{};
// TODO: Enable based on compat.ini.
uint32_t hacksEnabled_ = 0;
// Compile done notifications.
std::mutex compileDoneMutex_;
std::condition_variable compileDone_;
// Image barrier helper used during command buffer record (PerformRenderPass etc).
// Stored here to help reuse the allocation.
VulkanBarrier recordBarrier_;
// Swap chain management
struct SwapchainImageData {
VkImage image;
VkImageView view;
};
std::vector<VkFramebuffer> framebuffers_;
std::vector<SwapchainImageData> swapchainImages_;
uint32_t swapchainImageCount_ = 0;
struct DepthBufferInfo {
VkFormat format = VK_FORMAT_UNDEFINED;
VkImage image = VK_NULL_HANDLE;
VmaAllocation alloc = VK_NULL_HANDLE;
VkImageView view = VK_NULL_HANDLE;
};
DepthBufferInfo depth_;
};