Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
`FLAG_READ_MFRA_FOR_SEEK_MAP` to the `FragmentedMp4Extractor`, which is
now done by default in `DefaultExtractorsFactory`
([#3088](https://github.com/androidx/media/issues/3088)).
* MP3: Use gapless-aware durations from Xing/Info headers
([#3183](https://github.com/androidx/media/issues/3183)).
* Ignore `av1C` data with unsupported version.
* MP4: Add support for big-endian floating point PCM in `fpcm` boxes.
* Matroska: Parse chapter info to `Chapter` entries in a track's
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import androidx.media3.common.C;
import androidx.media3.extractor.ConstantBitrateSeekMap;
import androidx.media3.extractor.MpegAudioUtil;
import androidx.media3.extractor.SeekMap.SeekPoints;
import androidx.media3.extractor.SeekPoint;

/**
* MP3 seeker that doesn't rely on metadata and seeks assuming the source has a constant bitrate.
Expand All @@ -28,11 +30,15 @@
private final int bitrate;
private final int frameSize;
private final boolean allowSeeksIfLengthUnknown;
private final long durationUs;
private final long dataEndPosition;

/**
* Constructs an instance.
*
* <p>The duration exposed from {@link #getDurationUs()} is computed from {@code inputLength} and
* the frame bitrate, or is {@link C#TIME_UNSET} if {@code inputLength} is unknown.
*
* @param inputLength The length of the stream in bytes, or {@link C#LENGTH_UNSET} if unknown.
* @param firstFramePosition The position of the first frame in the stream.
* @param mpegAudioHeader The MPEG audio header associated with the first frame.
Expand All @@ -53,23 +59,30 @@ public ConstantBitrateSeeker(
mpegAudioHeader.bitrate,
mpegAudioHeader.frameSize,
allowSeeksIfLengthUnknown,
/* isEstimated= */ true);
/* durationUs= */ C.TIME_UNSET);
}

/** See {@link ConstantBitrateSeekMap#ConstantBitrateSeekMap(long, long, int, int, boolean)}. */
/**
* See {@link ConstantBitrateSeekMap#ConstantBitrateSeekMap(long, long, int, int, boolean)}. Uses
* {@code durationUs} as the duration exposed from {@link #getDurationUs()}, or computes the
* duration from {@code inputLength} and {@code bitrate} if {@code durationUs} is {@link
* C#TIME_UNSET}.
*/
public ConstantBitrateSeeker(
long inputLength,
long firstFramePosition,
int bitrate,
int frameSize,
boolean allowSeeksIfLengthUnknown) {
boolean allowSeeksIfLengthUnknown,
long durationUs) {
this(
inputLength,
firstFramePosition,
bitrate,
frameSize,
allowSeeksIfLengthUnknown,
/* isEstimated= */ true);
/* isEstimated= */ true,
durationUs);
}

private ConstantBitrateSeeker(
Expand All @@ -78,7 +91,8 @@ private ConstantBitrateSeeker(
int bitrate,
int frameSize,
boolean allowSeeksIfLengthUnknown,
boolean isEstimated) {
boolean isEstimated,
long durationUs) {
super(
inputLength,
firstFramePosition,
Expand All @@ -88,8 +102,9 @@ private ConstantBitrateSeeker(
isEstimated);
this.firstFramePosition = firstFramePosition;
this.bitrate = bitrate;
this.frameSize = frameSize;
this.frameSize = frameSize == C.LENGTH_UNSET ? 1 : frameSize;
this.allowSeeksIfLengthUnknown = allowSeeksIfLengthUnknown;
this.durationUs = durationUs;
dataEndPosition = inputLength != C.LENGTH_UNSET ? inputLength : C.INDEX_UNSET;
}

Expand All @@ -98,6 +113,17 @@ public long getTimeUs(long position) {
return getTimeUsAtPosition(position);
}

@Override
public SeekPoints getSeekPoints(long timeUs) {
if (durationUs != C.TIME_UNSET && timeUs >= durationUs && dataEndPosition != C.INDEX_UNSET) {
long finalFramePosition = Math.max(firstFramePosition, dataEndPosition - frameSize);
long frameDurationUs = getTimeUsAtPosition(firstFramePosition + frameSize);
return new SeekPoints(
new SeekPoint(Math.max(0, durationUs - frameDurationUs), finalFramePosition));
}
return super.getSeekPoints(timeUs);
}

@Override
public long getDataStartPosition() {
return firstFramePosition;
Expand All @@ -108,6 +134,11 @@ public long getDataEndPosition() {
return dataEndPosition;
}

@Override
public long getDurationUs() {
return durationUs != C.TIME_UNSET ? durationUs : super.getDurationUs();
}

@Override
public int getAverageBitrate() {
return bitrate;
Expand All @@ -120,6 +151,7 @@ public ConstantBitrateSeeker copyWithNewDataEndPosition(long dataEndPosition) {
bitrate,
frameSize,
allowSeeksIfLengthUnknown,
/* isEstimated= */ false);
/* isEstimated= */ false,
durationUs);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package androidx.media3.extractor.mp3;

import static androidx.media3.extractor.mp3.Mp3Util.computeAverageBitrate;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.lang.annotation.ElementType.TYPE_USE;
import static java.lang.annotation.RetentionPolicy.SOURCE;
Expand Down Expand Up @@ -264,7 +265,7 @@ public int read(ExtractorInput input, PositionHolder seekPosition) throws IOExce
int readResult = readInternal(input);
if (readResult == RESULT_END_OF_INPUT && seeker instanceof IndexSeeker) {
// Duration is exact when index seeker is used.
long durationUs = computeTimeUs(samplesRead);
long durationUs = computeFinalIndexSeekerDurationUs(samplesRead);
if (seeker.getDurationUs() != durationUs) {
((IndexSeeker) seeker).setDurationUs(durationUs);
extractorOutput.seekMap(seeker);
Expand Down Expand Up @@ -389,6 +390,28 @@ private long computeTimeUs(long samplesRead) {
return basisTimeUs + samplesRead * C.MICROS_PER_SECOND / synchronizedHeader.sampleRate;
}

/**
* Returns the final duration to expose for an {@link IndexSeeker}.
*
* <p>Index seeking finalizes duration from the encoded samples read at EOF. When gapless metadata
* is present, this trims the encoder delay and padding so EOF finalization does not replace an
* initially gapless Xing/Info duration with the longer encoded duration.
*/
private long computeFinalIndexSeekerDurationUs(long samplesRead) {
long durationUs = computeTimeUs(samplesRead);
if (!gaplessInfoHolder.hasGaplessInfo()) {
return durationUs;
}
long finalGaplessSampleIndex =
Util.durationUsToSampleCount(durationUs, synchronizedHeader.sampleRate)
- gaplessInfoHolder.encoderDelay
- gaplessInfoHolder.encoderPadding
- 1;
return finalGaplessSampleIndex >= 0
? Util.sampleCountToDurationUs(finalGaplessSampleIndex, synchronizedHeader.sampleRate)
: C.TIME_UNSET;
}

private boolean synchronize(ExtractorInput input, boolean sniffing) throws IOException {
int validFrameCount = 0;
int candidateSynchronizedHeaderData = 0;
Expand Down Expand Up @@ -519,12 +542,9 @@ private Seeker computeSeeker(ExtractorInput input) throws IOException {
resultSeeker.getDataEndPosition());
}

if (shouldFallbackToConstantBitrateSeeking(resultSeeker)
&& resultSeeker.getDurationUs() != C.TIME_UNSET
&& (resultSeeker.getDataEndPosition() != C.INDEX_UNSET
|| input.getLength() != C.LENGTH_UNSET)) {
// resultSeeker does not allow seeking, but does provide a duration and constant bitrate
// seeking has been requested, so we can do 'enhanced' CBR seeking using this duration info.
if (shouldFallbackToConstantBitrateSeeking(resultSeeker)) {
// If resultSeeker does not allow seeking but provides a duration and known end position, use
// this info to do 'enhanced' CBR seeking.
long dataStart =
resultSeeker.getDataStartPosition() != C.INDEX_UNSET
? resultSeeker.getDataStartPosition()
Expand All @@ -533,24 +553,26 @@ private Seeker computeSeeker(ExtractorInput input) throws IOException {
resultSeeker.getDataEndPosition() != C.INDEX_UNSET
? resultSeeker.getDataEndPosition()
: input.getLength();
long audioLength = inputLength - dataStart;
int bitrate =
Ints.saturatedCast(
Util.scaleLargeValue(
audioLength,
Byte.SIZE * C.MICROS_PER_SECOND,
resultSeeker.getDurationUs(),
RoundingMode.HALF_UP));
// inputLength will never be LENGTH_UNSET because of the outer if-condition, so we can pass
// (vacuously) false here for allowSeeksIfLengthUnknown.
resultSeeker =
new ConstantBitrateSeeker(
inputLength,
dataStart,
bitrate,
C.LENGTH_UNSET,
/* allowSeeksIfLengthUnknown= */ false);
} else if (shouldFallbackToConstantBitrateSeeking(resultSeeker)) {
long durationUs = resultSeeker.getDurationUs();
if (durationUs != C.TIME_UNSET && inputLength != C.LENGTH_UNSET) {
int averageBitrate = computeAverageBitrate(inputLength - dataStart, durationUs);
if (averageBitrate != C.RATE_UNSET_INT) {
// Only use enhanced CBR seeking when its bitrate can be derived safely. Otherwise, the
// regular CBR fallback below will use the next frame header bitrate.
// inputLength is known, so we can pass (vacuously) false for allowSeeksIfLengthUnknown.
resultSeeker =
new ConstantBitrateSeeker(
inputLength,
dataStart,
averageBitrate,
C.LENGTH_UNSET,
/* allowSeeksIfLengthUnknown= */ false,
durationUs);
}
}
}

if (shouldFallbackToConstantBitrateSeeking(resultSeeker)) {
// Either we found no seek or VBR info, so we must assume the file is CBR (even without the
// flag(s) being set), or an 'enable CBR seeking flag' is set and we found some seek info, but
// not enough to do 'enhanced' CBR seeking with. In either case, we fall back to CBR seeking
Expand Down Expand Up @@ -670,15 +692,13 @@ private Seeker getConstantBitrateSeeker(

// Derive the bitrate and frame size by averaging over the length of playable audio, to allow
// for 'mostly' CBR streams that might have a small number of frames with a different bitrate.
// We can assume infoFrame.frameCount is set, because otherwise computeDurationUs() would
// have returned C.TIME_UNSET above. See also https://github.com/androidx/media/issues/1376.
int averageBitrate =
Ints.checkedCast(
Util.scaleLargeValue(
audioLength,
C.BITS_PER_BYTE * C.MICROS_PER_SECOND,
durationUs,
RoundingMode.HALF_UP));
// See also https://github.com/androidx/media/issues/1376.
int averageBitrate = computeAverageBitrate(audioLength, durationUs);
if (averageBitrate == C.RATE_UNSET_INT) {
Comment thread
icbaker marked this conversation as resolved.
// Invalid Info sizes or durations should fall back to the next frame header bitrate rather
// than constructing a ConstantBitrateSeeker with an unset bitrate.
return null;
}
int frameSize =
Ints.checkedCast(LongMath.divide(audioLength, infoFrame.frameCount, RoundingMode.HALF_UP));
// Set the seeker frame size to the average frame size (even though some constant bitrate
Expand All @@ -689,7 +709,8 @@ private Seeker getConstantBitrateSeeker(
/* firstFramePosition= */ infoFramePosition + infoFrame.header.frameSize,
averageBitrate,
frameSize,
/* allowSeeksIfLengthUnknown= */ false);
/* allowSeeksIfLengthUnknown= */ false,
durationUs);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,18 +141,35 @@ public static XingFrame parse(MpegAudioUtil.Header mpegAudioHeader, ParsableByte

/**
* Compute the stream duration, in microseconds, represented by this frame. Returns {@link
* C#LENGTH_UNSET} if the frame doesn't contain enough information to compute a duration.
* C#TIME_UNSET} if the frame doesn't contain enough information to compute a duration. Encoder
* delay and padding are subtracted if present.
*/
// TODO: b/319235116 - Handle encoder delay and padding when calculating duration.
public long computeDurationUs() {
long sampleCount = getSampleCount();
if (sampleCount == C.LENGTH_UNSET) {
return C.TIME_UNSET;
}
if (encoderDelay != C.LENGTH_UNSET && encoderPadding != C.LENGTH_UNSET) {
sampleCount -= encoderDelay + encoderPadding;
}
if (sampleCount <= 0) {
return C.TIME_UNSET;
}
return computeDurationUs(sampleCount);
}

private long getSampleCount() {
if (frameCount == C.LENGTH_UNSET || frameCount == 0) {
// If the frame count is missing/invalid, the header can't be used to determine the duration.
return C.TIME_UNSET;
return C.LENGTH_UNSET;
}
return frameCount * header.samplesPerFrame;
}

private long computeDurationUs(long sampleCount) {
// Audio requires both a start and end PCM sample, so subtract one from the sample count before
// calculating the duration.
return Util.sampleCountToDurationUs(
(frameCount * header.samplesPerFrame) - 1, header.sampleRate);
return Util.sampleCountToDurationUs(sampleCount - 1, header.sampleRate);
}

/** Provide the metadata derived from this Xing frame, such as ReplayGain data. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import androidx.media3.common.util.Util;
import androidx.media3.datasource.DefaultDataSource;
import androidx.media3.extractor.SeekMap;
import androidx.media3.extractor.SeekPoint;
import androidx.media3.test.utils.FakeExtractorOutput;
import androidx.media3.test.utils.FakeTrackOutput;
import androidx.media3.test.utils.TestUtil;
Expand Down Expand Up @@ -66,6 +67,23 @@ public void mp3ExtractorReads_returnSeekableCbrSeeker() throws IOException {
assertThat(seekMap.isSeekable()).isTrue();
}

@Test
public void getSeekPoints_atExplicitDuration_returnsFinalFrameSeekPoint() {
ConstantBitrateSeeker seeker =
new ConstantBitrateSeeker(
/* inputLength= */ 1_125,
/* firstFramePosition= */ 125,
/* bitrate= */ 8_000,
/* frameSize= */ 1,
/* allowSeeksIfLengthUnknown= */ false,
/* durationUs= */ 900_000);

assertThat(seeker.getDurationUs()).isEqualTo(900_000);
assertThat(seeker.getTimeUs(1_025)).isEqualTo(900_000);
assertThat(seeker.getSeekPoints(800_000).first.position).isEqualTo(925);
assertThat(seeker.getSeekPoints(900_000).first).isEqualTo(new SeekPoint(899_000, 1_124));
}

@Test
public void seeking_handlesSeekToZero() throws IOException {
String fileName = CONSTANT_FRAME_SIZE_TEST_FILE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
public class IndexSeekerTest {

private static final String TEST_FILE_XING_NO_TOC = "media/mp3/bear-vbr-xing-header-no-toc.mp3";
private static final int TEST_FILE_XING_NO_TOC_DURATION = 2_808_000;
private static final int TEST_FILE_XING_NO_TOC_GAPLESS_DURATION_US = 2_783_979;

private Mp3Extractor extractor;
private FakeExtractorOutput extractorOutput;
Expand All @@ -65,14 +65,14 @@ public void mp3ExtractorReads_returnsSeekableSeekMap() throws Exception {
}

@Test
public void mp3ExtractorReads_correctsInexactDuration() throws Exception {
public void mp3ExtractorReads_preservesGaplessDurationAfterEof() throws Exception {
FakeExtractorOutput extractorOutput =
TestUtil.extractAllSamplesFromFile(
extractor, ApplicationProvider.getApplicationContext(), TEST_FILE_XING_NO_TOC);

SeekMap seekMap = extractorOutput.seekMap;

assertThat(seekMap.getDurationUs()).isEqualTo(TEST_FILE_XING_NO_TOC_DURATION);
assertThat(seekMap.getDurationUs()).isEqualTo(TEST_FILE_XING_NO_TOC_GAPLESS_DURATION_US);
}

@Test
Expand All @@ -86,6 +86,17 @@ public void constructor_calculatesCorrectAverageBitrate() {
assertThat(seeker.getAverageBitrate()).isEqualTo(8_000);
}

@Test
public void constructor_returnsUnsetAverageBitrateWhenAverageCannotBeCalculated() {
IndexSeeker seeker =
new IndexSeeker(
/* durationUs= */ C.TIME_UNSET,
/* dataStartPosition= */ 100,
/* dataEndPosition= */ C.INDEX_UNSET);

assertThat(seeker.getAverageBitrate()).isEqualTo(C.RATE_UNSET_INT);
}

@Test
public void seeking_handlesSeekToZero() throws Exception {
String fileName = TEST_FILE_XING_NO_TOC;
Expand All @@ -111,7 +122,7 @@ public void seeking_handlesSeekToEof() throws Exception {
SeekMap seekMap = TestUtil.extractSeekMap(extractor, extractorOutput, dataSource, fileUri);
FakeTrackOutput trackOutput = extractorOutput.trackOutputs.get(0);

long targetSeekTimeUs = TEST_FILE_XING_NO_TOC_DURATION;
long targetSeekTimeUs = TEST_FILE_XING_NO_TOC_GAPLESS_DURATION_US;
int extractedFrameIndex =
TestUtil.seekToTimeUs(
extractor, seekMap, targetSeekTimeUs, dataSource, trackOutput, fileUri);
Expand Down
Loading