From 0b33d20fce0d28a930a40bacfee3a5e8fb0bce10 Mon Sep 17 00:00:00 2001 From: Alicia Boya Garcia Date: Wed, 26 Oct 2022 09:27:48 -0700 Subject: [PATCH] [MSE][GStreamer] Add documentation comments to createOptionalParserForFormat() Reviewed by Philippe Normand. This is a documentation patch. No changes in behavior. There was no explanation on why we used parsers at all, and this caused confusion in at least one issue [1], so I added code to document it. [1] https://github.com/WebPlatformForEmbedded/WPEWebKit/pull/948 * Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp: (WebCore::createOptionalParserForFormat): Canonical link: https://commits.webkit.org/256020@main --- .../graphics/gstreamer/mse/AppendPipeline.cpp | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp b/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp index 1baa7f23a7fcf..bab1797829bb8 100644 --- a/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp +++ b/Source/WebCore/platform/graphics/gstreamer/mse/AppendPipeline.cpp @@ -668,24 +668,69 @@ void AppendPipeline::handleAppsinkNewSampleFromStreamingThread(GstElement*) static GRefPtr createOptionalParserForFormat(const AtomString& trackId, const GstCaps* caps) { + // Parser elements have either or both of two functions: + // + // a) Framing: Several popular formats (notably MPEG Audio) can be used without a container. + // MSE supports such formats when operating in "sequence" mode. When using these formats, + // the parser is an essential element, as it receives buffers of arbitrary byte sizes + // and identifies where each frame starts and ends, splitting them into separate GstBuffer + // objects, and reassembling frames that were split between two appends. + // + // b) Metadata filling: Even when framing is taken care of by a container, sometimes there is + // important metadata missing. This may be the case because the container format does not + // require such metadata, or it may be because of broken files. Either way, parsers allow + // us to recover potentially missing metadata from the binary contents of audio or video + // frames. + // + // NOTE: Please add and keep comments updated with the rationale for each parser. + GstStructure* structure = gst_caps_get_structure(caps, 0); const char* mediaType = gst_structure_get_name(structure); auto parserName = makeString(trackId, "_parser"_s); + // Since parsers are not needed in every case, we can use an identity element as pass-through + // parser for cases where a parser is not needed, making the management of elements and pads + // more orthogonal. const char* elementClass = "identity"; - if (!g_strcmp0(mediaType, "audio/x-opus")) + if (!g_strcmp0(mediaType, "audio/x-opus")) { + // Necessary for: metadata filling. + // Frame durations are optional in Matroska/WebM. Although frame durations are not required + // for regular playback, they're necessary for MSE, especially handling replacement of frames + // during quality changes. + // An example of an Opus audio file lacking durations is car_opus_low.webm + // https://storage.googleapis.com/ytlr-cert.appspot.com/test/materials/media/car_opus_low.webm elementClass = "opusparse"; - else if (!g_strcmp0(mediaType, "video/x-h264")) + } else if (!g_strcmp0(mediaType, "video/x-h264")) { + // Necessary for: metadata filling. + // Some dubiously muxed content lacks the bit specifying what frames are key frames or not. + // Without this bit, seeks will most often than not cause corrupted output in the decoder, + // as the browser will be unaware of any dependencies of those frames and they won't be fed + // to the decoder. + // An example of such a stream: http://orange-opensource.github.io/hasplayer.js/1.2.0/player.html?url=http://playready.directtaps.net/smoothstreaming/SSWSS720H264/SuperSpeedway_720.ism/Manifest elementClass = "h264parse"; - else if (!g_strcmp0(mediaType, "audio/mpeg")) { + } else if (!g_strcmp0(mediaType, "audio/mpeg")) { + // Necessary for: framing. + // The Media Source Extensions Byte Stream Format Registry includes MPEG Audio Byte Stream Format + // as the (as of writing) only one spec-defined format that has the "Generate Timestamps Flag" set + // to false, i.e. is used without a demuxer, in "sequence" mode. + // We need a parser to take care of extracting the frames from the byte stream. int mpegversion = 0; gst_structure_get_int(structure, "mpegversion", &mpegversion); switch (mpegversion) { case 1: + // MPEG-1 Part 3 Audio (ISO 11172-3) Layer I -- MP1, archaic + // MPEG-1 Part 3 Audio (ISO 11172-3) Layer II -- MP2, common in audio broadcasting, e.g. DVB + // MPEG-1 Part 3 Audio (ISO 11172-3) Layer III -- MP3, the only one of the three most people actually know elementClass = "mpegaudioparse"; break; case 2: + // MPEG-2 Part 7 Advanced Audio Coding (ISO 13818-7) -- MPEG-2 AAC, the original AAC format, widely used, + // has extensions retrofitted. case 4: + // MPEG-4 Part 3 Audio (ISO 14496-3) -- MPEG-4 Audio, which more often than not also contains AAC audio, + // defines several extensions to the original AAC, also widely used. + // Not to be confused with the MP4 file format, which is a container format, not an audio stream format, + // and can incidentally contain MPEG-4 audio. elementClass = "aacparse"; break; default: