alvr-org · Vixea · Jan 28, 2023 · Jan 22, 2023 · Jan 25, 2023 · Jan 25, 2023
diff --git a/alvr/server/cpp/alvr_server/ClientConnection.cpp b/alvr/server/cpp/alvr_server/ClientConnection.cpp
@@ -8,65 +8,95 @@
 #include "Utils.h"
 #include "Settings.h"
 
-static const uint8_t NAL_TYPE_SPS = 7;
+static const uint8_t H264_NAL_TYPE_SPS = 7;
 static const uint8_t H265_NAL_TYPE_VPS = 32;
 
-ClientConnection::ClientConnection() {
-	m_Statistics = std::make_shared<Statistics>();
+static const uint8_t H264_NAL_TYPE_AUD = 9;
+static const uint8_t H265_NAL_TYPE_AUD = 35;
+
+ClientConnection::ClientConnection() { 
+	m_Statistics = std::make_shared<Statistics>(); 
 }
 
-int findVPSSPS(const uint8_t *frameBuffer, int frameByteSize) {
-    int zeroes = 0;
-    int foundNals = 0;
-    for (int i = 0; i < frameByteSize; i++) {
-        if (frameBuffer[i] == 0) {
-            zeroes++;
-        } else if (frameBuffer[i] == 1) {
-            if (zeroes >= 2) {
-                foundNals++;
-                if (Settings::Instance().m_codec == ALVR_CODEC_H264 && foundNals >= 3) {
-                    // Find end of SPS+PPS on H.264.
-                    return i - 3;
-                } else if (Settings::Instance().m_codec == ALVR_CODEC_H265 && foundNals >= 4) {
-                    // Find end of VPS+SPS+PPS on H.264.
-                    return i - 3;
-                }
-            }
-            zeroes = 0;
-        } else {
-            zeroes = 0;
-        }
-    }
-    return -1;
+/*
+	Extracts the (VPS + )SPS + PPS video configuration headers from H.264 or H.265 stream as a sequence of NALs.
+	(VPS + )SPS + PPS have short size (8bytes + 28bytes in some environment), so we can
+	assume SPS + PPS is contained in first fragment.
+*/
+void extractHeaders(uint8_t **buf, int *len, int nalNum) {
+	static const char NAL_HEADER[] = {0x00, 0x00, 0x00, 0x01};
+
+	uint8_t *b = *buf;
+	uint8_t *end = b + *len;
+
+	int headersLen = 0;
+	int foundHeaders = -1; // Offset by 1 header to find the length until the next header
+	while (b != end) {
+		if (memcmp(b, NAL_HEADER, sizeof(NAL_HEADER)) == 0) {
+			foundHeaders++;
+			if (foundHeaders == nalNum) {
+				break;
+			}
+			b += sizeof(NAL_HEADER);
+			headersLen += sizeof(NAL_HEADER);
+		}
+
+		b++;
+		headersLen++;
+	}
+	if (headersLen == 0 || foundHeaders != nalNum) {
+		return;
+	}
+	InitializeDecoder((const unsigned char *)b, headersLen);
+
+	// move the cursor forward excluding config NALs
+	*buf = b;
+	*len -= headersLen;
+}
+
+void processH264Nals(uint8_t **buf, int *len) {
+	uint8_t *b = *buf;
+	int l = *len;
+	uint8_t nalType = b[4] & 0x1F;
+
+	if (nalType == H264_NAL_TYPE_AUD) {
+		b += 6;
+		l -= 6;
+		nalType = b[4] & 0x1F;
+	}
+	if (nalType == H264_NAL_TYPE_SPS) {
+		extractHeaders(buf, len, 2); // 2 headers SPS and PPS
+	}
+	*buf = b;
+	*len = l;
+}
+
+void processH265Nals(uint8_t **buf, int *len) {
+	uint8_t *b = *buf;
+	int l = *len;
+	uint8_t nalType = (b[4] >> 1) & 0x3F;
+
+	if (nalType == H265_NAL_TYPE_AUD) {
+		b += 7;
+		l -= 7;
+		nalType = (b[4] >> 1) & 0x3F;
+	}
+	if (nalType == H265_NAL_TYPE_VPS) {
+		extractHeaders(buf, len, 3); // 3 headers VPS, SPS and PPS
+	}
+	*buf = b;
+	*len = l;
 }
 
 void ClientConnection::SendVideo(uint8_t *buf, int len, uint64_t targetTimestampNs) {
 	// Report before the frame is packetized
 	ReportEncoded(targetTimestampNs);
 
-	uint8_t NALType;
-	if (Settings::Instance().m_codec == ALVR_CODEC_H264)
-		NALType = buf[4] & 0x1F;
-	else
-		NALType = (buf[4] >> 1) & 0x3F;
-
-	if ((Settings::Instance().m_codec == ALVR_CODEC_H264 && NALType == NAL_TYPE_SPS) ||
-		(Settings::Instance().m_codec == ALVR_CODEC_H265 && NALType == H265_NAL_TYPE_VPS)) {
-		// This frame contains (VPS + )SPS + PPS + IDR on NVENC H.264 (H.265) stream.
-		// (VPS + )SPS + PPS has short size (8bytes + 28bytes in some environment), so we can
-		// assume SPS + PPS is contained in first fragment.
-
-		int end = findVPSSPS(buf, len);
-		if (end == -1) {
-			// Invalid frame.
-			return;
-		}
-
-		InitializeDecoder((const unsigned char *)buf, end);
-
-		// move the cursor forward excluding config NALs
-		buf = &buf[end];
-		len = len - end;
+	int codec = Settings::Instance().m_codec;
+	if (codec == ALVR_CODEC_H264) {
+		processH264Nals(&buf, &len);
+	} else if (codec == ALVR_CODEC_H265) {
+		processH265Nals(&buf, &len);
 	}
 
 	VideoSend(targetTimestampNs, buf, len);

diff --git a/alvr/server/cpp/platform/linux/CEncoder.cpp b/alvr/server/cpp/platform/linux/CEncoder.cpp
@@ -224,7 +224,6 @@ void CEncoder::Run() {
 
       fprintf(stderr, "CEncoder starting to read present packets");
       present_packet frame_info;
-      std::vector<uint8_t> encoded_data;
       while (not m_exiting) {
         read_latest(client, (char *)&frame_info, sizeof(frame_info), m_exiting);
 
@@ -250,9 +249,8 @@ void CEncoder::Run() {
 
         static_assert(sizeof(frame_info.pose) == sizeof(vr::HmdMatrix34_t&));
 
-        encoded_data.clear();
-        uint64_t pts;
-        if (!encode_pipeline->GetEncoded(encoded_data, &pts)) {
+        alvr::FramePacket packet;
+        if (!encode_pipeline->GetEncoded(packet)) {
           Error("Failed to get encoded data!");
           continue;
         }
@@ -279,10 +277,11 @@ void CEncoder::Run() {
         ReportPresent(pose->targetTimestampNs, present_offset);
         ReportComposed(pose->targetTimestampNs, composed_offset);
 
-        m_listener->SendVideo(encoded_data.data(), encoded_data.size(), pts);
+        m_listener->SendVideo(packet.data, packet.size, packet.pts);
 
         m_listener->GetStatistics()->EncodeOutput();
 
+        encode_pipeline->Free();
       }
     }
     catch (std::exception &e) {

diff --git a/alvr/server/cpp/platform/linux/EncodePipeline.cpp b/alvr/server/cpp/platform/linux/EncodePipeline.cpp
@@ -12,59 +12,6 @@ extern "C" {
 #include <libavcodec/avcodec.h>
 }
 
-namespace {
-
-bool should_keep_nal_h264(const uint8_t * header_start)
-{
-  uint8_t nal_type = (header_start[2] == 0 ? header_start[4] : header_start[3]) & 0x1F;
-    switch (nal_type)
-    {
-      case 6: // supplemental enhancement information
-      case 9: // access unit delimiter
-        return false;
-      default:
-        return true;
-    }
-}
-
-bool should_keep_nal_h265(const uint8_t * header_start)
-{
-  uint8_t nal_type = ((header_start[2] == 0 ? header_start[4] : header_start[3]) >> 1) & 0x3F;
-  switch (nal_type)
-  {
-    case 35: // access unit delimiter
-    case 39: // supplemental enhancement information
-      return false;
-    default:
-      return true;
-  }
-}
-
-void filter_NAL(const uint8_t* input, size_t input_size, std::vector<uint8_t> &out)
-{
-  if (input_size < 4)
-    return;
-  auto codec = Settings::Instance().m_codec;
-  std::array<uint8_t, 3> header = {{0, 0, 1}};
-  auto end = input + input_size;
-  auto header_start = input;
-  while (header_start != end)
-  {
-    auto next_header = std::search(header_start + 3, end, header.begin(), header.end());
-    if (next_header != end and next_header[-1] == 0)
-    {
-      next_header--;
-    }
-    if (codec == ALVR_CODEC_H264 and should_keep_nal_h264(header_start))
-      out.insert(out.end(), header_start, next_header);
-    if (codec == ALVR_CODEC_H265 and should_keep_nal_h265(header_start))
-      out.insert(out.end(), header_start, next_header);
-    header_start = next_header;
-  }
-}
-
-}
-
 void alvr::EncodePipeline::SetBitrate(int64_t bitrate) {
   encoder_ctx->bit_rate = bitrate;
   encoder_ctx->rc_buffer_size = bitrate / Settings::Instance().m_refreshRate;
@@ -111,17 +58,24 @@ alvr::EncodePipeline::~EncodePipeline()
   avcodec_free_context(&encoder_ctx);
 }
 
-bool alvr::EncodePipeline::GetEncoded(std::vector<uint8_t> &out, uint64_t *pts)
+bool alvr::EncodePipeline::GetEncoded(FramePacket &packet)
 {
-  AVPacket * enc_pkt = av_packet_alloc();
-  int err = avcodec_receive_packet(encoder_ctx, enc_pkt);
-  if (err == AVERROR(EAGAIN)) {
-    return false;
-  } else if (err) {
+  encoder_packet = av_packet_alloc();
+  int err = avcodec_receive_packet(encoder_ctx, encoder_packet);
+  if (err != 0) {
+    av_packet_free(&encoder_packet);
+    if (err == AVERROR(EAGAIN)) {
+      return false;
+    }
     throw alvr::AvException("failed to encode", err);
   }
-  filter_NAL(enc_pkt->data, enc_pkt->size, out);
-  *pts = enc_pkt->pts;
-  av_packet_free(&enc_pkt);
+  packet.data = encoder_packet->data;
+  packet.size = encoder_packet->size;
+  packet.pts = encoder_packet->pts;
   return true;
 }
+
+void alvr::EncodePipeline::Free()
+{
+  av_packet_free(&encoder_packet);
+}
diff --git a/alvr/server/cpp/platform/linux/EncodePipeline.h b/alvr/server/cpp/platform/linux/EncodePipeline.h
@@ -4,6 +4,7 @@
 #include <vector>
 
 extern "C" struct AVCodecContext;
+extern "C" struct AVPacket;
 
 class Renderer;
 
@@ -14,6 +15,12 @@ class VkFrame;
 class VkFrameCtx;
 class VkContext;
 
+struct FramePacket {
+  uint8_t *data;
+  int size;
+  uint64_t pts;
+};
+
 class EncodePipeline
 {
 public:
@@ -25,13 +32,15 @@ class EncodePipeline
   virtual ~EncodePipeline();
 
   virtual void PushFrame(uint64_t targetTimestampNs, bool idr) = 0;
-  virtual bool GetEncoded(std::vector<uint8_t> & out, uint64_t *pts);
+  virtual bool GetEncoded(FramePacket &data);
+  virtual void Free();
   virtual Timestamp GetTimestamp() { return timestamp; }
 
   virtual void SetBitrate(int64_t bitrate);
   static std::unique_ptr<EncodePipeline> Create(Renderer *render, VkContext &vk_ctx, VkFrame &input_frame, VkFrameCtx &vk_frame_ctx, uint32_t width, uint32_t height);
 protected:
   AVCodecContext *encoder_ctx = nullptr; //shall be initialized by child class
+  AVPacket *encoder_packet = NULL;
   Timestamp timestamp = {};
 };
 

diff --git a/alvr/server/cpp/platform/linux/EncodePipelineAMF.cpp b/alvr/server/cpp/platform/linux/EncodePipelineAMF.cpp
@@ -456,26 +456,24 @@ void EncodePipelineAMF::PushFrame(uint64_t targetTimestampNs, bool idr)
     m_amfComponents.front()->SubmitInput(surface);
 }
 
-bool EncodePipelineAMF::GetEncoded(std::vector<uint8_t> &out, uint64_t *pts)
+bool EncodePipelineAMF::GetEncoded(FramePacket &packet)
 {
     if (m_hasQueryTimeout) {
         m_pipeline->Run();
     } else {
         uint32_t timeout = 4 * 1000; // 1 second
-        while (m_outBuffer.empty() && --timeout != 0) {
+        while (m_framePacket.data == nullptr && --timeout != 0) {
             std::this_thread::sleep_for(std::chrono::microseconds(250));
             m_pipeline->Run();
         }
     }
 
-    if (m_outBuffer.empty()) {
+    if (m_framePacket.data == nullptr) {
         Error("Timed out waiting for encoder data");
         return false;
     }
 
-    out = m_outBuffer;
-    *pts = m_targetTimestampNs;
-    m_outBuffer.clear();
+    packet = m_framePacket;
 
     uint64_t query;
     VK_CHECK(vkGetQueryPoolResults(m_render->m_dev, m_queryPool, 0, 1, sizeof(uint64_t), &query, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT));
@@ -484,6 +482,10 @@ bool EncodePipelineAMF::GetEncoded(std::vector<uint8_t> &out, uint64_t *pts)
     return true;
 }
 
+void EncodePipelineAMF::Free() {
+    m_framePacket = {nullptr, 0, 0};
+}
+
 void EncodePipelineAMF::SetBitrate(int64_t bitrate)
 {
     if (m_codec == ALVR_CODEC_H264) {
@@ -501,10 +503,9 @@ void EncodePipelineAMF::Receive(amf::AMFDataPtr data)
 {
     amf::AMFBufferPtr buffer(data); // query for buffer interface
 
-    char *p = reinterpret_cast<char*>(buffer->GetNative());
-    int length = static_cast<int>(buffer->GetSize());
-
-    m_outBuffer = std::vector<uint8_t>(p, p + length);
+    m_framePacket.data = reinterpret_cast<uint8_t *>(buffer->GetNative());
+    m_framePacket.size = static_cast<int>(buffer->GetSize());
+    m_framePacket.pts = m_targetTimestampNs;
 }
 
 void EncodePipelineAMF::ApplyFrameProperties(const amf::AMFSurfacePtr &surface, bool insertIDR)

diff --git a/alvr/server/cpp/platform/linux/EncodePipelineAMF.h b/alvr/server/cpp/platform/linux/EncodePipelineAMF.h
@@ -68,8 +68,9 @@ class EncodePipelineAMF : public EncodePipeline
     ~EncodePipelineAMF();
 
     void PushFrame(uint64_t targetTimestampNs, bool idr) override;
-    bool GetEncoded(std::vector<uint8_t> &out, uint64_t *pts) override;
+    bool GetEncoded(FramePacket &packet) override;
     void SetBitrate(int64_t bitrate) override;
+    void Free() override;
 
 private:
     amf::AMFComponentPtr MakeConverter(amf::AMF_SURFACE_FORMAT inputFormat, int width, int height, amf::AMF_SURFACE_FORMAT outputFormat);
@@ -96,7 +97,7 @@ class EncodePipelineAMF : public EncodePipeline
     int m_bitrateInMBits;
 
     bool m_hasQueryTimeout = false;
-    std::vector<uint8_t> m_outBuffer;
+    FramePacket m_framePacket = {nullptr, 0, 0};
     uint64_t m_targetTimestampNs;
 };