From 3cc2fe64269c1bc4706b4a475c9e75e4020b2a9c Mon Sep 17 00:00:00 2001
From: nift4 <nift4@protonmail.com>
Date: Mon, 20 Apr 2026 18:44:17 +0200
Subject: [PATCH] Enable accurate mime type assignment for DTS-HD in TS

Read more of the DTS-HD header in order to find out extension
substream type, to get correct mime type which is relevant for
buffer size decision logic (as DTS Express has way lower maximum
bit rate than DTS-HD).

Issue: #2487
Issue: #3147
---
 .../androidx/media3/extractor/DtsUtil.java    | 176 +++++++++++++++++-
 .../ts/sample_dts_hd_ma.ts.0.dump             |   2 +-
 .../ts/sample_dts_hd_ma.ts.1.dump             |   2 +-
 .../ts/sample_dts_hd_ma.ts.2.dump             |   2 +-
 .../ts/sample_dts_hd_ma.ts.3.dump             |   2 +-
 .../sample_dts_hd_ma.ts.unknown_length.dump   |   2 +-
 6 files changed, 176 insertions(+), 10 deletions(-)

diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java b/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java
index bed79220563..467148f7419 100644
--- a/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java
+++ b/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java
@@ -429,6 +429,8 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException {
     int assetsCount; // nuNumAssets
     int referenceClockCode; // nuRefClockCode
     int extensionSubstreamFrameDurationCode; // nuExSSFrameDurationCode
+    boolean enableMixMetadata = false; // bMixMetadataEnbl
+    int[] mixerOutChannels = null;
 
     boolean staticFieldsPresent = headerBits.readBit(); // bStaticFieldsPresent
     if (staticFieldsPresent) {
@@ -456,13 +458,16 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException {
         }
       }
 
-      if (headerBits.readBit()) { // bMixMetadataEnbl
+      enableMixMetadata = headerBits.readBit();
+      if (enableMixMetadata) { // bMixMetadataEnbl
         headerBits.skipBits(2); // nuMixMetadataAdjLevel
         int mixerOutputMaskBits = (headerBits.readBits(2) + 1) << 2; // nuBits4MixOutMask
         int mixerOutputConfigurationCount = headerBits.readBits(2) + 1; // nuNumMixOutConfigs
+        mixerOutChannels = new int[mixerOutputConfigurationCount];
         // Output Mixing Configuration Loop
         for (int i = 0; i < mixerOutputConfigurationCount; i++) {
-          headerBits.skipBits(mixerOutputMaskBits); // nuMixOutChMask
+          int mask = headerBits.readBits(mixerOutputMaskBits); // nuMixOutChMask
+          mixerOutChannels[i] = getRemapChannelCount(mask);
         }
       }
     } else {
@@ -476,8 +481,11 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException {
     headerBits.skipBits(extensionSubstreamFrameSizeBits); // nuAssetFsize
     int sampleRate = C.RATE_UNSET_INT;
     int channelCount = C.LENGTH_UNSET; // nuTotalNumChs
+    boolean embeddedStereo = false; // bEmbeddedStereoFlag
+    boolean embedded6ch = false; // bEmbeddedSixChFlag
 
-    // Asset descriptor, see ETSI TS 102 114 V1.6.1 (2019-08) Table 7-5.
+    // Asset descriptor: Size, Index and Per Stream Static Metadata, see ETSI TS 102 114 V1.6.1
+    // (2019-08) Table 7-5.
     headerBits.skipBits(9 + 3); // nuAssetDescriptFsize, nuAssetIndex
     if (staticFieldsPresent) {
       if (headerBits.readBit()) { // bAssetTypeDescrPresent
@@ -493,9 +501,113 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException {
       headerBits.skipBits(5); // nuBitResolution
       sampleRate = SAMPLE_RATE_BY_INDEX[headerBits.readBits(4)]; // nuMaxSampleRate
       channelCount = headerBits.readBits(8) + 1;
-      // Done reading necessary bits, ignoring the rest.
+      if (headerBits.readBit()) { // bOne2OneMapChannels2Speakers
+        if (channelCount > 2) {
+          embeddedStereo = headerBits.readBit(); // bEmbeddedStereoFlag
+        }
+        if (channelCount > 6) {
+          embedded6ch = headerBits.readBit(); // bEmbeddedSixChFlag
+        }
+        int speakerMaskLength = 0;
+        if (headerBits.readBit()) { // bSpkrMaskEnabled
+          speakerMaskLength = (headerBits.readBits(2) + 1) << 2; // nuNumBits4SAMask
+          headerBits.skipBits(speakerMaskLength); // nuSpkrActivityMask
+        }
+        int speakerRemapSetsCount = headerBits.readBits(3); // nuNumSpkrRemapSets
+        int[] speakerRemapSets = new int[speakerRemapSetsCount];
+        for (int i = 0; i < speakerRemapSetsCount; i++) {
+          speakerRemapSets[i] = headerBits.readBits(speakerMaskLength); // nuStndrSpkrLayoutMask[ns]
+        }
+        for (int i = 0; i < speakerRemapSetsCount; i++) {
+          int remapChannelCount = getRemapChannelCount(speakerRemapSets[i]);
+          int remapMaskLength = headerBits.readBits(5) + 1; // nuNumDecCh4Remap[ns]
+          for (int j = 0; j < remapChannelCount; j++) {
+            int remapMask = headerBits.readBits(remapMaskLength); // nuRemapDecChMask[ns][nCh]
+            int coef = Integer.bitCount(remapMask); // nCoef
+            headerBits.skipBits(coef * 5); // nuSpkrRemapCodes[ns][nCh][nc]
+          }
+        }
+      }
+    } else {
+      headerBits.skipBits(3); // nuRepresentationType
     }
 
+    // Asset descriptor: Dynamic Metadata - DRC, DNC and Mixing Metadata, see ETSI TS 102 114 V1.6.1
+    // (2019-08) Table 7-6.
+    boolean hasDrcCoef = headerBits.readBit();
+    if (hasDrcCoef) { // bDRCCoefPresent
+      headerBits.skipBits(8); // nuDRCCode
+    }
+    if (headerBits.readBit()) { // bDialNormPresent
+      headerBits.skipBits(5); // nuDialNormCode
+    }
+    if (hasDrcCoef && embeddedStereo) {
+      headerBits.skipBits(8); // nuDRC2ChDmixCode
+    }
+    if (enableMixMetadata && headerBits.readBit()) { // bMixMetadataPresent
+      headerBits.skipBits(1 + 6); // bExternalMixFlag, nuPostMixGainAdjCode
+      if (headerBits.readBits(2) < 3) { // nuControlMixerDRC
+        headerBits.skipBits(3); // nuLimit4EmbeddedDRC
+      } else {
+        headerBits.skipBits(8); // nuCustomDRCCode
+      }
+      boolean audioScalePerChannel = headerBits.readBit(); // bEnblPerChMainAudioScale
+      for (int mixerOutChannel : mixerOutChannels) {
+        if (audioScalePerChannel) {
+          headerBits.skipBits(6 * mixerOutChannel); // nuMainAudioScaleCode[ns][nCh]
+        } else {
+          headerBits.skipBits(6); // nuMainAudioScaleCode[ns][0]
+        }
+      }
+      int mixesCount = 1; // nEmDM
+      int[] channelCountsForDownmixes = new int[3];
+      channelCountsForDownmixes[0] = channelCount; // nDecCh[0]
+      if (embedded6ch) {
+        channelCountsForDownmixes[mixesCount] = 6; // nDecCh[nEmDM]
+        mixesCount++; // nEmDM
+      }
+      if (embeddedStereo) {
+        channelCountsForDownmixes[mixesCount] = 2; // nDecCh[nEmDM]
+        mixesCount++; // nEmDM
+      }
+      for (int mixerOutChannel : mixerOutChannels) {
+        for (int downmix = 0; downmix < mixesCount; downmix++) {
+          int channelCountForDownmix = channelCountsForDownmixes[downmix];
+          for (int downmixChannel = 0; downmixChannel < channelCountForDownmix; downmixChannel++) {
+            int mask = headerBits.readBits(mixerOutChannel); // nuMixMapMask[ns][nE][nCh]
+            int coefficients = Integer.bitCount(mask); // nuNumMixCoefs[ns][nE][nCh]
+            headerBits.skipBits(coefficients * 6); // nuMixCoeffs[ns][nE][nCh][nC]
+          }
+        }
+      }
+    }
+
+    // Asset descriptor: Decoder Navigation Data, see ETSI TS 102 114 V1.6.1 (2019-08) Table 7-7.
+    int codingMode = headerBits.readBits(2); // nuCodingMode
+    String mimeType;
+    switch (codingMode) {
+      case 0: // DTS-HD Coding Mode that may contain multiple coding components
+        int extensionMask = headerBits.readBits(12);
+        if ((extensionMask & 0x100) != 0) { // Low bit rate component
+          mimeType = MimeTypes.AUDIO_DTS_EXPRESS;
+        } else {
+          mimeType = MimeTypes.AUDIO_DTS_HD;
+        }
+        break;
+      case 1: // DTS-HD Loss-less coding mode without CBR component
+        mimeType = MimeTypes.AUDIO_DTS_HD;
+        break;
+      case 2: // DTS-HD Low bit-rate mode
+        mimeType = MimeTypes.AUDIO_DTS_EXPRESS;
+        break;
+      case 3: // The auxiliary coding mode is reserved for future applications.
+      default:
+        throw ParserException.createForMalformedContainer(
+            /* message= */ "Unsupported coding mode in DTS HD header: " + codingMode,
+            /* cause= */ null);
+    }
+    // Done reading necessary bits, ignoring the rest.
+
     long frameDurationUs = C.TIME_UNSET;
     if (staticFieldsPresent) {
       int referenceClockFrequency;
@@ -521,7 +633,7 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException {
               extensionSubstreamFrameDurationCode, C.MICROS_PER_SECOND, referenceClockFrequency);
     }
     return new DtsHeader(
-        MimeTypes.AUDIO_DTS_EXPRESS,
+        mimeType,
         channelCount,
         sampleRate,
         extensionSubstreamFrameSize,
@@ -529,6 +641,60 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException {
         /* bitrate= */ 0);
   }
 
+  // See Table 7-10 in ETSI TS 102 114 V1.6.1
+  private static int getRemapChannelCount(int mask) {
+    int remapChannelCount = 0;
+    if ((mask & 0x0001) != 0) { // Centre in front of listener
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x0002) != 0) { // Left/Right in front
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x0004) != 0) { // Left/Right surround on side in rear
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x0008) != 0) { // Low frequency effects subwoofer
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x0010) != 0) { // Centre surround in rear
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x0020) != 0) { // Left/Right height in front
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x0040) != 0) { // Left/Right surround in rear
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x0080) != 0) { // Centre Height in front
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x0100) != 0) { // Over the listener's head
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x0200) != 0) { // Between left/right and centre in front
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x0400) != 0) { // Left/Right on side in front
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x0800) != 0) { // Left/Right surround on side
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x1000) != 0) { // Second low frequency effects subwoofer
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x2000) != 0) { // Left/Right height on side
+      remapChannelCount += 2;
+    }
+    if ((mask & 0x4000) != 0) { // Centre height in rear
+      remapChannelCount += 1;
+    }
+    if ((mask & 0x8000) != 0) { // Left/Right height in rear
+      remapChannelCount += 2;
+    }
+    return remapChannelCount;
+  }
+
   /**
    * Returns the size of the extension substream header in a DTS-HD frame according to ETSI TS 102
    * 114 V1.6.1 (2019-08), Section 7.5.2.
diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump
index 2e70e07f75e..61ba38d8c47 100644
--- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump
+++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump
@@ -13,7 +13,7 @@ track 256:
     averageBitrate = 1536000
     id = 1/256
     containerMimeType = video/mp2t
-    sampleMimeType = audio/vnd.dts.hd;profile=lbr
+    sampleMimeType = audio/vnd.dts.hd
     channelCount = 8
     sampleRate = 48000
     language = en
diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump
index 2dc144fda79..2aa6ee986bd 100644
--- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump
+++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump
@@ -13,7 +13,7 @@ track 256:
     averageBitrate = 1536000
     id = 1/256
     containerMimeType = video/mp2t
-    sampleMimeType = audio/vnd.dts.hd;profile=lbr
+    sampleMimeType = audio/vnd.dts.hd
     channelCount = 8
     sampleRate = 48000
     language = en
diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump
index 690fa210839..c63c1833def 100644
--- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump
+++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump
@@ -13,7 +13,7 @@ track 256:
     averageBitrate = 1536000
     id = 1/256
     containerMimeType = video/mp2t
-    sampleMimeType = audio/vnd.dts.hd;profile=lbr
+    sampleMimeType = audio/vnd.dts.hd
     channelCount = 8
     sampleRate = 48000
     language = en
diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump
index 2ffcfa14c65..fc1aed4d522 100644
--- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump
+++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump
@@ -13,7 +13,7 @@ track 256:
     averageBitrate = 1536000
     id = 1/256
     containerMimeType = video/mp2t
-    sampleMimeType = audio/vnd.dts.hd;profile=lbr
+    sampleMimeType = audio/vnd.dts.hd
     channelCount = 8
     sampleRate = 48000
     language = en
diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump
index bbeb66f93b1..554573488ff 100644
--- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump
+++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump
@@ -10,7 +10,7 @@ track 256:
     averageBitrate = 1536000
     id = 1/256
     containerMimeType = video/mp2t
-    sampleMimeType = audio/vnd.dts.hd;profile=lbr
+    sampleMimeType = audio/vnd.dts.hd
     channelCount = 8
     sampleRate = 48000
     language = en