From 0b33d20fce0d28a930a40bacfee3a5e8fb0bce10 Mon Sep 17 00:00:00 2001
From: Alicia Boya Garcia <aboya@igalia.com>
Date: Wed, 26 Oct 2022 09:27:48 -0700
Subject: [PATCH] [MSE][GStreamer] Add documentation comments to
 createOptionalParserForFormat()

Reviewed by Philippe Normand.

This is a documentation patch. No changes in behavior.

There was no explanation on why we used parsers at all, and this caused
confusion in at least one issue [1], so I added code to document it.

[1] https://github.com/WebPlatformForEmbedded/WPEWebKit/pull/948

* Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp:
(WebCore::createOptionalParserForFormat):

Canonical link: https://commits.webkit.org/256020@main
---
 .../graphics/gstreamer/mse/AppendPipeline.cpp | 51 +++++++++++++++++--
 1 file changed, 48 insertions(+), 3 deletions(-)
diff --git a/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp b/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp
index 1baa7f23a7fcf..bab1797829bb8 100644
--- a/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp
+++ b/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp
@@ -668,24 +668,69 @@ void AppendPipeline::handleAppsinkNewSampleFromStreamingThread(GstElement*)
 static GRefPtr<GstElement>
 createOptionalParserForFormat(const AtomString& trackId, const GstCaps* caps)
 {
+    // Parser elements have either or both of two functions:
+    //
+    // a) Framing: Several popular formats (notably MPEG Audio) can be used without a container.
+    //    MSE supports such formats when operating in "sequence" mode. When using these formats,
+    //    the parser is an essential element, as it receives buffers of arbitrary byte sizes
+    //    and identifies where each frame starts and ends, splitting them into separate GstBuffer
+    //    objects, and reassembling frames that were split between two appends.
+    //
+    // b) Metadata filling: Even when framing is taken care of by a container, sometimes there is
+    //    important metadata missing. This may be the case because the container format does not
+    //    require such metadata, or it may be because of broken files. Either way, parsers allow
+    //    us to recover potentially missing metadata from the binary contents of audio or video
+    //    frames.
+    //
+    // NOTE: Please add and keep comments updated with the rationale for each parser.
+
     GstStructure* structure = gst_caps_get_structure(caps, 0);
     const char* mediaType = gst_structure_get_name(structure);
     auto parserName = makeString(trackId, "_parser"_s);
+    // Since parsers are not needed in every case, we can use an identity element as pass-through
+    // parser for cases where a parser is not needed, making the management of elements and pads
+    // more orthogonal.
     const char* elementClass = "identity";
 
-    if (!g_strcmp0(mediaType, "audio/x-opus"))
+    if (!g_strcmp0(mediaType, "audio/x-opus")) {
+        // Necessary for: metadata filling.
+        // Frame durations are optional in Matroska/WebM. Although frame durations are not required
+        // for regular playback, they're necessary for MSE, especially handling replacement of frames
+        // during quality changes.
+        // An example of an Opus audio file lacking durations is car_opus_low.webm
+        // https://storage.googleapis.com/ytlr-cert.appspot.com/test/materials/media/car_opus_low.webm
         elementClass = "opusparse";
-    else if (!g_strcmp0(mediaType, "video/x-h264"))
+    } else if (!g_strcmp0(mediaType, "video/x-h264")) {
+        // Necessary for: metadata filling.
+        // Some dubiously muxed content lacks the bit specifying what frames are key frames or not.
+        // Without this bit, seeks will most often than not cause corrupted output in the decoder,
+        // as the browser will be unaware of any dependencies of those frames and they won't be fed
+        // to the decoder.
+        // An example of such a stream: http://orange-opensource.github.io/hasplayer.js/1.2.0/player.html?url=http://playready.directtaps.net/smoothstreaming/SSWSS720H264/SuperSpeedway_720.ism/Manifest
         elementClass = "h264parse";
-    else if (!g_strcmp0(mediaType, "audio/mpeg")) {
+    } else if (!g_strcmp0(mediaType, "audio/mpeg")) {
+        // Necessary for: framing.
+        // The Media Source Extensions Byte Stream Format Registry includes MPEG Audio Byte Stream Format
+        // as the (as of writing) only one spec-defined format that has the "Generate Timestamps Flag" set
+        // to false, i.e. is used without a demuxer, in "sequence" mode.
+        // We need a parser to take care of extracting the frames from the byte stream.
         int mpegversion = 0;
         gst_structure_get_int(structure, "mpegversion", &mpegversion);
         switch (mpegversion) {
         case 1:
+            // MPEG-1 Part 3 Audio (ISO 11172-3) Layer I -- MP1, archaic
+            // MPEG-1 Part 3 Audio (ISO 11172-3) Layer II -- MP2, common in audio broadcasting, e.g. DVB
+            // MPEG-1 Part 3 Audio (ISO 11172-3) Layer III -- MP3, the only one of the three most people actually know
             elementClass = "mpegaudioparse";
             break;
         case 2:
+            // MPEG-2 Part 7 Advanced Audio Coding (ISO 13818-7) -- MPEG-2 AAC, the original AAC format, widely used,
+            // has extensions retrofitted.
         case 4:
+            // MPEG-4 Part 3 Audio (ISO 14496-3) -- MPEG-4 Audio, which more often than not also contains AAC audio,
+            // defines several extensions to the original AAC, also widely used.
+            // Not to be confused with the MP4 file format, which is a container format, not an audio stream format,
+            // and can incidentally contain MPEG-4 audio.
             elementClass = "aacparse";
             break;
         default: