From b6ccc7031e2fc0c5c1bf5d052c662b762df01be7 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Sun, 8 Jan 2023 12:07:04 +0100 Subject: [PATCH 1/2] Statistics: Add offset to ReportPresent/ReportComposed It is needed for accurate timing with Vulkan. --- alvr/server/cpp/alvr_server/alvr_server.cpp | 4 ++-- alvr/server/cpp/alvr_server/bindings.h | 4 ++-- alvr/server/cpp/platform/linux/CEncoder.cpp | 4 ++-- .../cpp/platform/win32/OvrDirectModeComponent.cpp | 4 ++-- alvr/server/src/lib.rs | 14 ++++++++++---- alvr/server/src/statistics.rs | 8 ++++---- 6 files changed, 22 insertions(+), 16 deletions(-) diff --git a/alvr/server/cpp/alvr_server/alvr_server.cpp b/alvr/server/cpp/alvr_server/alvr_server.cpp index ac9fd027cf..5479071b9d 100644 --- a/alvr/server/cpp/alvr_server/alvr_server.cpp +++ b/alvr/server/cpp/alvr_server/alvr_server.cpp @@ -181,8 +181,8 @@ void (*VideoSend)(VideoFrame header, unsigned char *buf, int len); void (*HapticsSend)(unsigned long long path, float duration_s, float frequency, float amplitude); void (*ShutdownRuntime)(); unsigned long long (*PathStringToHash)(const char *path); -void (*ReportPresent)(unsigned long long timestamp_ns); -void (*ReportComposed)(unsigned long long timestamp_ns); +void (*ReportPresent)(unsigned long long timestamp_ns, unsigned long long offset_ns); +void (*ReportComposed)(unsigned long long timestamp_ns, unsigned long long offset_ns); void (*ReportEncoded)(unsigned long long timestamp_ns); void (*ReportFecFailure)(int percentage); diff --git a/alvr/server/cpp/alvr_server/bindings.h b/alvr/server/cpp/alvr_server/bindings.h index 0531f42e8d..9cddb4871a 100644 --- a/alvr/server/cpp/alvr_server/bindings.h +++ b/alvr/server/cpp/alvr_server/bindings.h @@ -130,8 +130,8 @@ extern "C" void (*HapticsSend)(unsigned long long path, float amplitude); extern "C" void (*ShutdownRuntime)(); extern "C" unsigned long long (*PathStringToHash)(const char *path); -extern "C" void (*ReportPresent)(unsigned long long timestamp_ns); -extern "C" void (*ReportComposed)(unsigned long long timestamp_ns); +extern "C" void (*ReportPresent)(unsigned long long timestamp_ns, unsigned long long offset_ns); +extern "C" void (*ReportComposed)(unsigned long long timestamp_ns, unsigned long long offset_ns); extern "C" void (*ReportEncoded)(unsigned long long timestamp_ns); extern "C" void (*ReportFecFailure)(int percentage); diff --git a/alvr/server/cpp/platform/linux/CEncoder.cpp b/alvr/server/cpp/platform/linux/CEncoder.cpp index 298fb39162..0cfbda1b34 100644 --- a/alvr/server/cpp/platform/linux/CEncoder.cpp +++ b/alvr/server/cpp/platform/linux/CEncoder.cpp @@ -239,7 +239,7 @@ void CEncoder::Run() { } // Close enough to present - ReportPresent(pose->targetTimestampNs); + ReportPresent(pose->targetTimestampNs, 0); if (m_captureFrame) { m_captureFrame = false; @@ -250,7 +250,7 @@ void CEncoder::Run() { render.Render(frame_info.image, frame_info.semaphore_value); - ReportComposed(pose->targetTimestampNs); + ReportComposed(pose->targetTimestampNs, 0); encode_pipeline->PushFrame(pose->targetTimestampNs, m_scheduler.CheckIDRInsertion()); diff --git a/alvr/server/cpp/platform/win32/OvrDirectModeComponent.cpp b/alvr/server/cpp/platform/win32/OvrDirectModeComponent.cpp index 5ac542086f..20ed92e547 100644 --- a/alvr/server/cpp/platform/win32/OvrDirectModeComponent.cpp +++ b/alvr/server/cpp/platform/win32/OvrDirectModeComponent.cpp @@ -155,7 +155,7 @@ void OvrDirectModeComponent::SubmitLayer(const SubmitLayerPerEye_t(&perEye)[2]) /** Submits queued layers for display. */ void OvrDirectModeComponent::Present(vr::SharedTextureHandle_t syncTexture) { - ReportPresent(m_targetTimestampNs); + ReportPresent(m_targetTimestampNs, 0); bool useMutex = true; @@ -203,7 +203,7 @@ void OvrDirectModeComponent::Present(vr::SharedTextureHandle_t syncTexture) } } - ReportComposed(m_targetTimestampNs); + ReportComposed(m_targetTimestampNs, 0); if (m_pEncoder) { m_pEncoder->NewFrameReady(); diff --git a/alvr/server/src/lib.rs b/alvr/server/src/lib.rs index d6420af7d9..6b4001f4e5 100644 --- a/alvr/server/src/lib.rs +++ b/alvr/server/src/lib.rs @@ -425,15 +425,21 @@ pub unsafe extern "C" fn HmdDriverFactory( alvr_common::hash_string(CStr::from_ptr(path).to_str().unwrap()) } - extern "C" fn report_present(timestamp_ns: u64) { + extern "C" fn report_present(timestamp_ns: u64, offset_ns: u64) { if let Some(stats) = &mut *STATISTICS_MANAGER.lock() { - stats.report_frame_present(Duration::from_nanos(timestamp_ns)); + stats.report_frame_present( + Duration::from_nanos(timestamp_ns), + Duration::from_nanos(offset_ns), + ); } } - extern "C" fn report_composed(timestamp_ns: u64) { + extern "C" fn report_composed(timestamp_ns: u64, offset_ns: u64) { if let Some(stats) = &mut *STATISTICS_MANAGER.lock() { - stats.report_frame_composed(Duration::from_nanos(timestamp_ns)); + stats.report_frame_composed( + Duration::from_nanos(timestamp_ns), + Duration::from_nanos(offset_ns), + ); } } diff --git a/alvr/server/src/statistics.rs b/alvr/server/src/statistics.rs index 705ff878b4..2f495cf274 100644 --- a/alvr/server/src/statistics.rs +++ b/alvr/server/src/statistics.rs @@ -89,13 +89,13 @@ impl StatisticsManager { } } - pub fn report_frame_present(&mut self, target_timestamp: Duration) { + pub fn report_frame_present(&mut self, target_timestamp: Duration, offset: Duration) { if let Some(frame) = self .history_buffer .iter_mut() .find(|frame| frame.target_timestamp == target_timestamp) { - let now = Instant::now(); + let now = Instant::now() - offset; self.last_frame_present_interval = now.saturating_duration_since(self.last_frame_present_instant); @@ -105,13 +105,13 @@ impl StatisticsManager { } } - pub fn report_frame_composed(&mut self, target_timestamp: Duration) { + pub fn report_frame_composed(&mut self, target_timestamp: Duration, offset: Duration) { if let Some(frame) = self .history_buffer .iter_mut() .find(|frame| frame.target_timestamp == target_timestamp) { - frame.frame_composed = Instant::now(); + frame.frame_composed = Instant::now() - offset; } } From 7ca8bf5e0c16dde4fc22ca2283e599bf1415f554 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Wed, 11 Jan 2023 12:47:28 +0100 Subject: [PATCH 2/2] Vulkan: Use timestamp queries to get more accurate latency values Server compositor no longer includes time spent waiting on game to finish rendering frames. --- alvr/server/cpp/platform/linux/CEncoder.cpp | 11 +++--- .../cpp/platform/linux/EncodePipeline.cpp | 5 +++ .../cpp/platform/linux/EncodePipeline.h | 2 ++ .../cpp/platform/linux/EncodePipelineSW.cpp | 1 + .../cpp/platform/linux/FormatConverter.cpp | 19 ++++++++++ .../cpp/platform/linux/FormatConverter.h | 3 ++ alvr/server/cpp/platform/linux/Renderer.cpp | 36 +++++++++++++++++++ alvr/server/cpp/platform/linux/Renderer.h | 11 ++++++ .../cpp/platform/linux/ffmpeg_helper.cpp | 3 +- 9 files changed, 85 insertions(+), 6 deletions(-) diff --git a/alvr/server/cpp/platform/linux/CEncoder.cpp b/alvr/server/cpp/platform/linux/CEncoder.cpp index 0cfbda1b34..c583d742ba 100644 --- a/alvr/server/cpp/platform/linux/CEncoder.cpp +++ b/alvr/server/cpp/platform/linux/CEncoder.cpp @@ -238,9 +238,6 @@ void CEncoder::Run() { continue; } - // Close enough to present - ReportPresent(pose->targetTimestampNs, 0); - if (m_captureFrame) { m_captureFrame = false; render.Wait(frame_info.image, frame_info.semaphore_value); @@ -250,8 +247,6 @@ void CEncoder::Run() { render.Render(frame_info.image, frame_info.semaphore_value); - ReportComposed(pose->targetTimestampNs, 0); - encode_pipeline->PushFrame(pose->targetTimestampNs, m_scheduler.CheckIDRInsertion()); static_assert(sizeof(frame_info.pose) == sizeof(vr::HmdMatrix34_t&)); @@ -263,6 +258,12 @@ void CEncoder::Run() { continue; } + auto timestamps = render.GetTimestamps(); + uint64_t timestamp_present = timestamps.renderBegin; + uint64_t timestamp_composed = encode_pipeline->GetTimestamp() ? encode_pipeline->GetTimestamp() : timestamps.renderComplete; + ReportPresent(pose->targetTimestampNs, timestamps.now - timestamp_present); + ReportComposed(pose->targetTimestampNs, timestamps.now - timestamp_composed); + m_listener->SendVideo(encoded_data.data(), encoded_data.size(), pts); m_listener->GetStatistics()->EncodeOutput(); diff --git a/alvr/server/cpp/platform/linux/EncodePipeline.cpp b/alvr/server/cpp/platform/linux/EncodePipeline.cpp index 50d13d57b6..570575b305 100644 --- a/alvr/server/cpp/platform/linux/EncodePipeline.cpp +++ b/alvr/server/cpp/platform/linux/EncodePipeline.cpp @@ -125,3 +125,8 @@ bool alvr::EncodePipeline::GetEncoded(std::vector &out, uint64_t *pts) av_packet_free(&enc_pkt); return true; } + +uint64_t alvr::EncodePipeline::GetTimestamp() +{ + return gpu_timestamp; +} diff --git a/alvr/server/cpp/platform/linux/EncodePipeline.h b/alvr/server/cpp/platform/linux/EncodePipeline.h index 7f9647cb75..af057c8d9a 100644 --- a/alvr/server/cpp/platform/linux/EncodePipeline.h +++ b/alvr/server/cpp/platform/linux/EncodePipeline.h @@ -21,11 +21,13 @@ class EncodePipeline virtual void PushFrame(uint64_t targetTimestampNs, bool idr) = 0; virtual bool GetEncoded(std::vector & out, uint64_t *pts); + virtual uint64_t GetTimestamp(); virtual void SetBitrate(int64_t bitrate); static std::unique_ptr Create(Renderer *render, VkContext &vk_ctx, VkFrame &input_frame, VkFrameCtx &vk_frame_ctx, uint32_t width, uint32_t height); protected: AVCodecContext *encoder_ctx = nullptr; //shall be initialized by child class + uint64_t gpu_timestamp = 0; }; } diff --git a/alvr/server/cpp/platform/linux/EncodePipelineSW.cpp b/alvr/server/cpp/platform/linux/EncodePipelineSW.cpp index 5cea15b5f7..ee2059cfaf 100644 --- a/alvr/server/cpp/platform/linux/EncodePipelineSW.cpp +++ b/alvr/server/cpp/platform/linux/EncodePipelineSW.cpp @@ -116,6 +116,7 @@ alvr::EncodePipelineSW::~EncodePipelineSW() void alvr::EncodePipelineSW::PushFrame(uint64_t targetTimestampNs, bool idr) { rgbtoyuv->Convert(encoder_frame->data, encoder_frame->linesize); + gpu_timestamp = rgbtoyuv->GetTimestamp(); encoder_frame->pict_type = idr ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_NONE; encoder_frame->pts = targetTimestampNs; diff --git a/alvr/server/cpp/platform/linux/FormatConverter.cpp b/alvr/server/cpp/platform/linux/FormatConverter.cpp index 48365c2e39..46147f3792 100644 --- a/alvr/server/cpp/platform/linux/FormatConverter.cpp +++ b/alvr/server/cpp/platform/linux/FormatConverter.cpp @@ -16,6 +16,7 @@ FormatConverter::~FormatConverter() } vkDestroySampler(r->m_dev, m_sampler, nullptr); + vkDestroyQueryPool(r->m_dev, m_queryPool, nullptr); vkDestroyCommandPool(r->m_dev, m_commandPool, nullptr); vkDestroyDescriptorSetLayout(r->m_dev, m_descriptorLayout, nullptr); vkDestroyImageView(r->m_dev, m_view, nullptr); @@ -40,6 +41,13 @@ void FormatConverter::init(VkImage image, VkImageCreateInfo imageCreateInfo, int samplerInfo.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; VK_CHECK(vkCreateSampler(r->m_dev, &samplerInfo, nullptr, &m_sampler)); + // Timestamp query + VkQueryPoolCreateInfo queryPoolInfo = {}; + queryPoolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + queryPoolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + queryPoolInfo.queryCount = 1; + VK_CHECK(vkCreateQueryPool(r->m_dev, &queryPoolInfo, nullptr, &m_queryPool)); + // Command buffer VkCommandPoolCreateInfo cmdPoolInfo = {}; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; @@ -231,10 +239,14 @@ void FormatConverter::Convert(uint8_t **data, int *linesize) commandBufferBegin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; VK_CHECK(vkBeginCommandBuffer(m_commandBuffer, &commandBufferBegin)); + vkCmdResetQueryPool(m_commandBuffer, m_queryPool, 0, 1); + vkCmdBindPipeline(m_commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline); vkCmdBindDescriptorSets(m_commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descriptor, 0, nullptr); vkCmdDispatch(m_commandBuffer, m_groupCountX, m_groupCountY, 1); + vkCmdWriteTimestamp(m_commandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_queryPool, 0); + vkEndCommandBuffer(m_commandBuffer); VkSubmitInfo submitInfo = {}; @@ -252,6 +264,13 @@ void FormatConverter::Convert(uint8_t **data, int *linesize) } } +uint64_t FormatConverter::GetTimestamp() +{ + uint64_t query; + VK_CHECK(vkGetQueryPoolResults(r->m_dev, m_queryPool, 0, 1, sizeof(uint64_t), &query, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT)); + return query * r->m_timestampPeriod; +} + RgbToYuv420::RgbToYuv420(Renderer *render, VkImage image, VkImageCreateInfo imageInfo) : FormatConverter(render) { diff --git a/alvr/server/cpp/platform/linux/FormatConverter.h b/alvr/server/cpp/platform/linux/FormatConverter.h index e70d52df89..17cafd09f3 100644 --- a/alvr/server/cpp/platform/linux/FormatConverter.h +++ b/alvr/server/cpp/platform/linux/FormatConverter.h @@ -9,6 +9,8 @@ class FormatConverter void Convert(uint8_t **data, int *linesize); + uint64_t GetTimestamp(); + protected: struct OutputImage { VkImage image; @@ -23,6 +25,7 @@ class FormatConverter Renderer *r; VkSampler m_sampler; + VkQueryPool m_queryPool; VkCommandPool m_commandPool; VkCommandBuffer m_commandBuffer; VkDescriptorSet m_descriptor; diff --git a/alvr/server/cpp/platform/linux/Renderer.cpp b/alvr/server/cpp/platform/linux/Renderer.cpp index bdd44cad0d..4bf4c36eda 100644 --- a/alvr/server/cpp/platform/linux/Renderer.cpp +++ b/alvr/server/cpp/platform/linux/Renderer.cpp @@ -52,7 +52,12 @@ Renderer::Renderer(const VkInstance &inst, const VkDevice &dev, const VkPhysical VK_LOAD_PFN(vkImportSemaphoreFdKHR); VK_LOAD_PFN(vkGetMemoryFdKHR); VK_LOAD_PFN(vkGetImageDrmFormatModifierPropertiesEXT); + VK_LOAD_PFN(vkGetCalibratedTimestampsEXT); #undef VK_LOAD_PFN + + VkPhysicalDeviceProperties props = {}; + vkGetPhysicalDeviceProperties(m_physDev, &props); + m_timestampPeriod = props.limits.timestampPeriod; } Renderer::~Renderer() @@ -76,6 +81,7 @@ Renderer::~Renderer() vkFreeMemory(m_dev, m_output.memory, nullptr); vkDestroyFramebuffer(m_dev, m_output.framebuffer, nullptr); + vkDestroyQueryPool(m_dev, m_queryPool, nullptr); vkDestroyCommandPool(m_dev, m_commandPool, nullptr); vkDestroySampler(m_dev, m_sampler, nullptr); vkDestroyBuffer(m_dev, m_vertexBuffer, nullptr); @@ -95,6 +101,13 @@ void Renderer::Startup(uint32_t width, uint32_t height, VkFormat format) vkGetDeviceQueue(m_dev, m_queueFamilyIndex, 0, &m_queue); vkGetDeviceQueue(m_dev, m_queueFamilyIndexCompute, 0, &m_queueCompute); + // Timestamp query + VkQueryPoolCreateInfo queryPoolInfo = {}; + queryPoolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + queryPoolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + queryPoolInfo.queryCount = 2; + VK_CHECK(vkCreateQueryPool(m_dev, &queryPoolInfo, nullptr, &m_queryPool)); + // Command buffer VkCommandPoolCreateInfo cmdPoolInfo = {}; cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; @@ -575,6 +588,9 @@ void Renderer::Render(uint32_t index, uint64_t waitValue) commandBufferBegin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; VK_CHECK(vkBeginCommandBuffer(m_commandBuffer, &commandBufferBegin)); + vkCmdResetQueryPool(m_commandBuffer, m_queryPool, 0, 2); + vkCmdWriteTimestamp(m_commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_queryPool, 0); + for (size_t i = 0; i < m_pipelines.size(); ++i) { VkRect2D rect = {}; VkDescriptorSet in = VK_NULL_HANDLE; @@ -595,6 +611,8 @@ void Renderer::Render(uint32_t index, uint64_t waitValue) m_pipelines[i]->Render(in, out, rect); } + vkCmdWriteTimestamp(m_commandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_queryPool, 1); + VK_CHECK(vkEndCommandBuffer(m_commandBuffer)); VkTimelineSemaphoreSubmitInfo timelineInfo = {}; @@ -623,6 +641,24 @@ void Renderer::Render(uint32_t index, uint64_t waitValue) } } +Renderer::Timestamps Renderer::GetTimestamps() +{ + uint64_t queries[2]; + VK_CHECK(vkGetQueryPoolResults(m_dev, m_queryPool, 0, 2, 2 * sizeof(uint64_t), queries, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT)); + queries[0] *= m_timestampPeriod; + queries[1] *= m_timestampPeriod; + + VkCalibratedTimestampInfoEXT timestampInfo = {}; + timestampInfo.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + timestampInfo.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; + uint64_t deviation; + uint64_t timestamp; + VK_CHECK(d.vkGetCalibratedTimestampsEXT(m_dev, 1, ×tampInfo, ×tamp, &deviation)); + timestamp *= m_timestampPeriod; + + return {timestamp, queries[0], queries[1]}; +} + void Renderer::Wait(uint32_t index, uint64_t waitValue) { VkSemaphoreWaitInfo waitInfo = {}; diff --git a/alvr/server/cpp/platform/linux/Renderer.h b/alvr/server/cpp/platform/linux/Renderer.h index b6b4fb8a96..b5739f6bdc 100644 --- a/alvr/server/cpp/platform/linux/Renderer.h +++ b/alvr/server/cpp/platform/linux/Renderer.h @@ -41,6 +41,12 @@ class Renderer DrmImage drm; }; + struct Timestamps { + uint64_t now; + uint64_t renderBegin; + uint64_t renderComplete; + }; + explicit Renderer(const VkInstance &inst, const VkDevice &dev, const VkPhysicalDevice &physDev, uint32_t graphicsIdx, uint32_t computeIdx, const std::vector &devExtensions); virtual ~Renderer(); @@ -54,6 +60,8 @@ class Renderer void Render(uint32_t index, uint64_t waitValue); + Timestamps GetTimestamps(); + void CopyOutput(VkImage image, VkFormat format, VkImageLayout layout, VkSemaphore *semaphore = nullptr, VkFence *fence = nullptr); void Wait(uint32_t index, uint64_t waitValue); @@ -89,6 +97,7 @@ class Renderer PFN_vkImportSemaphoreFdKHR vkImportSemaphoreFdKHR; PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR; PFN_vkGetImageDrmFormatModifierPropertiesEXT vkGetImageDrmFormatModifierPropertiesEXT; + PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT; bool haveDmaBuf = false; bool haveDrmModifiers = false; } d; @@ -107,6 +116,7 @@ class Renderer uint32_t m_queueFamilyIndexCompute; VkFormat m_format; VkExtent2D m_imageSize; + VkQueryPool m_queryPool; VkCommandPool m_commandPool; VkSampler m_sampler; VkBuffer m_vertexBuffer; @@ -116,6 +126,7 @@ class Renderer VkDescriptorSetLayout m_descriptorLayout; VkCommandBuffer m_commandBuffer; VkFence m_fence; + double m_timestampPeriod; std::string m_inputImageCapture; std::string m_outputImageCapture; diff --git a/alvr/server/cpp/platform/linux/ffmpeg_helper.cpp b/alvr/server/cpp/platform/linux/ffmpeg_helper.cpp index 0a54a31e79..0275a1e892 100644 --- a/alvr/server/cpp/platform/linux/ffmpeg_helper.cpp +++ b/alvr/server/cpp/platform/linux/ffmpeg_helper.cpp @@ -52,7 +52,8 @@ alvr::VkContext::VkContext(const char *deviceName, const std::vector