From 3cc2fe64269c1bc4706b4a475c9e75e4020b2a9c Mon Sep 17 00:00:00 2001 From: nift4 Date: Mon, 20 Apr 2026 18:44:17 +0200 Subject: [PATCH] Enable accurate mime type assignment for DTS-HD in TS Read more of the DTS-HD header in order to find out extension substream type, to get correct mime type which is relevant for buffer size decision logic (as DTS Express has way lower maximum bit rate than DTS-HD). Issue: #2487 Issue: #3147 --- .../androidx/media3/extractor/DtsUtil.java | 176 +++++++++++++++++- .../ts/sample_dts_hd_ma.ts.0.dump | 2 +- .../ts/sample_dts_hd_ma.ts.1.dump | 2 +- .../ts/sample_dts_hd_ma.ts.2.dump | 2 +- .../ts/sample_dts_hd_ma.ts.3.dump | 2 +- .../sample_dts_hd_ma.ts.unknown_length.dump | 2 +- 6 files changed, 176 insertions(+), 10 deletions(-) diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java b/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java index bed79220563..467148f7419 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java @@ -429,6 +429,8 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { int assetsCount; // nuNumAssets int referenceClockCode; // nuRefClockCode int extensionSubstreamFrameDurationCode; // nuExSSFrameDurationCode + boolean enableMixMetadata = false; // bMixMetadataEnbl + int[] mixerOutChannels = null; boolean staticFieldsPresent = headerBits.readBit(); // bStaticFieldsPresent if (staticFieldsPresent) { @@ -456,13 +458,16 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { } } - if (headerBits.readBit()) { // bMixMetadataEnbl + enableMixMetadata = headerBits.readBit(); + if (enableMixMetadata) { // bMixMetadataEnbl headerBits.skipBits(2); // nuMixMetadataAdjLevel int mixerOutputMaskBits = (headerBits.readBits(2) + 1) << 2; // nuBits4MixOutMask int mixerOutputConfigurationCount = headerBits.readBits(2) + 1; // nuNumMixOutConfigs + mixerOutChannels = new int[mixerOutputConfigurationCount]; // Output Mixing Configuration Loop for (int i = 0; i < mixerOutputConfigurationCount; i++) { - headerBits.skipBits(mixerOutputMaskBits); // nuMixOutChMask + int mask = headerBits.readBits(mixerOutputMaskBits); // nuMixOutChMask + mixerOutChannels[i] = getRemapChannelCount(mask); } } } else { @@ -476,8 +481,11 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { headerBits.skipBits(extensionSubstreamFrameSizeBits); // nuAssetFsize int sampleRate = C.RATE_UNSET_INT; int channelCount = C.LENGTH_UNSET; // nuTotalNumChs + boolean embeddedStereo = false; // bEmbeddedStereoFlag + boolean embedded6ch = false; // bEmbeddedSixChFlag - // Asset descriptor, see ETSI TS 102 114 V1.6.1 (2019-08) Table 7-5. + // Asset descriptor: Size, Index and Per Stream Static Metadata, see ETSI TS 102 114 V1.6.1 + // (2019-08) Table 7-5. headerBits.skipBits(9 + 3); // nuAssetDescriptFsize, nuAssetIndex if (staticFieldsPresent) { if (headerBits.readBit()) { // bAssetTypeDescrPresent @@ -493,9 +501,113 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { headerBits.skipBits(5); // nuBitResolution sampleRate = SAMPLE_RATE_BY_INDEX[headerBits.readBits(4)]; // nuMaxSampleRate channelCount = headerBits.readBits(8) + 1; - // Done reading necessary bits, ignoring the rest. + if (headerBits.readBit()) { // bOne2OneMapChannels2Speakers + if (channelCount > 2) { + embeddedStereo = headerBits.readBit(); // bEmbeddedStereoFlag + } + if (channelCount > 6) { + embedded6ch = headerBits.readBit(); // bEmbeddedSixChFlag + } + int speakerMaskLength = 0; + if (headerBits.readBit()) { // bSpkrMaskEnabled + speakerMaskLength = (headerBits.readBits(2) + 1) << 2; // nuNumBits4SAMask + headerBits.skipBits(speakerMaskLength); // nuSpkrActivityMask + } + int speakerRemapSetsCount = headerBits.readBits(3); // nuNumSpkrRemapSets + int[] speakerRemapSets = new int[speakerRemapSetsCount]; + for (int i = 0; i < speakerRemapSetsCount; i++) { + speakerRemapSets[i] = headerBits.readBits(speakerMaskLength); // nuStndrSpkrLayoutMask[ns] + } + for (int i = 0; i < speakerRemapSetsCount; i++) { + int remapChannelCount = getRemapChannelCount(speakerRemapSets[i]); + int remapMaskLength = headerBits.readBits(5) + 1; // nuNumDecCh4Remap[ns] + for (int j = 0; j < remapChannelCount; j++) { + int remapMask = headerBits.readBits(remapMaskLength); // nuRemapDecChMask[ns][nCh] + int coef = Integer.bitCount(remapMask); // nCoef + headerBits.skipBits(coef * 5); // nuSpkrRemapCodes[ns][nCh][nc] + } + } + } + } else { + headerBits.skipBits(3); // nuRepresentationType } + // Asset descriptor: Dynamic Metadata - DRC, DNC and Mixing Metadata, see ETSI TS 102 114 V1.6.1 + // (2019-08) Table 7-6. + boolean hasDrcCoef = headerBits.readBit(); + if (hasDrcCoef) { // bDRCCoefPresent + headerBits.skipBits(8); // nuDRCCode + } + if (headerBits.readBit()) { // bDialNormPresent + headerBits.skipBits(5); // nuDialNormCode + } + if (hasDrcCoef && embeddedStereo) { + headerBits.skipBits(8); // nuDRC2ChDmixCode + } + if (enableMixMetadata && headerBits.readBit()) { // bMixMetadataPresent + headerBits.skipBits(1 + 6); // bExternalMixFlag, nuPostMixGainAdjCode + if (headerBits.readBits(2) < 3) { // nuControlMixerDRC + headerBits.skipBits(3); // nuLimit4EmbeddedDRC + } else { + headerBits.skipBits(8); // nuCustomDRCCode + } + boolean audioScalePerChannel = headerBits.readBit(); // bEnblPerChMainAudioScale + for (int mixerOutChannel : mixerOutChannels) { + if (audioScalePerChannel) { + headerBits.skipBits(6 * mixerOutChannel); // nuMainAudioScaleCode[ns][nCh] + } else { + headerBits.skipBits(6); // nuMainAudioScaleCode[ns][0] + } + } + int mixesCount = 1; // nEmDM + int[] channelCountsForDownmixes = new int[3]; + channelCountsForDownmixes[0] = channelCount; // nDecCh[0] + if (embedded6ch) { + channelCountsForDownmixes[mixesCount] = 6; // nDecCh[nEmDM] + mixesCount++; // nEmDM + } + if (embeddedStereo) { + channelCountsForDownmixes[mixesCount] = 2; // nDecCh[nEmDM] + mixesCount++; // nEmDM + } + for (int mixerOutChannel : mixerOutChannels) { + for (int downmix = 0; downmix < mixesCount; downmix++) { + int channelCountForDownmix = channelCountsForDownmixes[downmix]; + for (int downmixChannel = 0; downmixChannel < channelCountForDownmix; downmixChannel++) { + int mask = headerBits.readBits(mixerOutChannel); // nuMixMapMask[ns][nE][nCh] + int coefficients = Integer.bitCount(mask); // nuNumMixCoefs[ns][nE][nCh] + headerBits.skipBits(coefficients * 6); // nuMixCoeffs[ns][nE][nCh][nC] + } + } + } + } + + // Asset descriptor: Decoder Navigation Data, see ETSI TS 102 114 V1.6.1 (2019-08) Table 7-7. + int codingMode = headerBits.readBits(2); // nuCodingMode + String mimeType; + switch (codingMode) { + case 0: // DTS-HD Coding Mode that may contain multiple coding components + int extensionMask = headerBits.readBits(12); + if ((extensionMask & 0x100) != 0) { // Low bit rate component + mimeType = MimeTypes.AUDIO_DTS_EXPRESS; + } else { + mimeType = MimeTypes.AUDIO_DTS_HD; + } + break; + case 1: // DTS-HD Loss-less coding mode without CBR component + mimeType = MimeTypes.AUDIO_DTS_HD; + break; + case 2: // DTS-HD Low bit-rate mode + mimeType = MimeTypes.AUDIO_DTS_EXPRESS; + break; + case 3: // The auxiliary coding mode is reserved for future applications. + default: + throw ParserException.createForMalformedContainer( + /* message= */ "Unsupported coding mode in DTS HD header: " + codingMode, + /* cause= */ null); + } + // Done reading necessary bits, ignoring the rest. + long frameDurationUs = C.TIME_UNSET; if (staticFieldsPresent) { int referenceClockFrequency; @@ -521,7 +633,7 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { extensionSubstreamFrameDurationCode, C.MICROS_PER_SECOND, referenceClockFrequency); } return new DtsHeader( - MimeTypes.AUDIO_DTS_EXPRESS, + mimeType, channelCount, sampleRate, extensionSubstreamFrameSize, @@ -529,6 +641,60 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { /* bitrate= */ 0); } + // See Table 7-10 in ETSI TS 102 114 V1.6.1 + private static int getRemapChannelCount(int mask) { + int remapChannelCount = 0; + if ((mask & 0x0001) != 0) { // Centre in front of listener + remapChannelCount += 1; + } + if ((mask & 0x0002) != 0) { // Left/Right in front + remapChannelCount += 2; + } + if ((mask & 0x0004) != 0) { // Left/Right surround on side in rear + remapChannelCount += 2; + } + if ((mask & 0x0008) != 0) { // Low frequency effects subwoofer + remapChannelCount += 1; + } + if ((mask & 0x0010) != 0) { // Centre surround in rear + remapChannelCount += 1; + } + if ((mask & 0x0020) != 0) { // Left/Right height in front + remapChannelCount += 2; + } + if ((mask & 0x0040) != 0) { // Left/Right surround in rear + remapChannelCount += 2; + } + if ((mask & 0x0080) != 0) { // Centre Height in front + remapChannelCount += 1; + } + if ((mask & 0x0100) != 0) { // Over the listener's head + remapChannelCount += 1; + } + if ((mask & 0x0200) != 0) { // Between left/right and centre in front + remapChannelCount += 2; + } + if ((mask & 0x0400) != 0) { // Left/Right on side in front + remapChannelCount += 2; + } + if ((mask & 0x0800) != 0) { // Left/Right surround on side + remapChannelCount += 2; + } + if ((mask & 0x1000) != 0) { // Second low frequency effects subwoofer + remapChannelCount += 1; + } + if ((mask & 0x2000) != 0) { // Left/Right height on side + remapChannelCount += 2; + } + if ((mask & 0x4000) != 0) { // Centre height in rear + remapChannelCount += 1; + } + if ((mask & 0x8000) != 0) { // Left/Right height in rear + remapChannelCount += 2; + } + return remapChannelCount; + } + /** * Returns the size of the extension substream header in a DTS-HD frame according to ETSI TS 102 * 114 V1.6.1 (2019-08), Section 7.5.2. diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump index 2e70e07f75e..61ba38d8c47 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump index 2dc144fda79..2aa6ee986bd 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump index 690fa210839..c63c1833def 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump index 2ffcfa14c65..fc1aed4d522 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump index bbeb66f93b1..554573488ff 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump @@ -10,7 +10,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en