Skip to content

Commit

Permalink
use a new silent frame insert algorithm for audio remux (#354)
Browse files Browse the repository at this point in the history
* use a new silent frame insert algorithm for audio remux.
the old algorithm cannot cover some common situation, like:
1. audio frame dts does not increase by a standard duration, sometimes it increase for a large duration(more than 1.5 * refSampleDuration), sometimes it increase for a small duration. But the average duration is more or less to refSampleDuration. the old algorithm would insert silent frames which result into video/audio unsync.
2. for network living stream, because of network jitter or frame lost, the encoder would generate the audio frames whose dts is not correct. but if the gap between two adjacent frame is not over 1.5 * refSampleDuration and the accumulate gap would become larger and larger. the old algorithm never insert silent frames for this case.

* initialize _audioNextRefDts when seeking

* fix: lint error

* test

* fix a curRefDts update mistake

* reuse some original variable which has the same meaning

* fix some variable mistake

* bug fix: incorrect byte length

Co-authored-by: xiaosong <kunkkaco@gmail.com>
Co-authored-by: wangjiankai <wangjiankai@cmhi.chinamobile.com>
  • Loading branch information
3 people committed Apr 28, 2021
1 parent 4485c09 commit 26d00d9
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 97 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,4 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
```

218 changes: 121 additions & 97 deletions src/remux/mp4-remuxer.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import Log from '../utils/logger.js';
import MP4 from './mp4-generator.js';
import AAC from './aac-silent.js';
import Browser from '../utils/browser.js';
import {SampleInfo, MediaSegmentInfo, MediaSegmentInfoList} from '../core/media-segment-info.js';
import {IllegalStateException} from '../utils/exception.js';
import { SampleInfo, MediaSegmentInfo, MediaSegmentInfoList } from '../core/media-segment-info.js';
import { IllegalStateException } from '../utils/exception.js';


// Fragmented mp4 remuxer
Expand Down Expand Up @@ -54,8 +54,8 @@ class MP4Remuxer {
// Workaround for chrome < 50: Always force first sample as a Random Access Point in media segment
// see https://bugs.chromium.org/p/chromium/issues/detail?id=229412
this._forceFirstIDR = (Browser.chrome &&
(Browser.version.major < 50 ||
(Browser.version.major === 50 && Browser.version.build < 2661))) ? true : false;
(Browser.version.major < 50 ||
(Browser.version.major === 50 && Browser.version.build < 2661))) ? true : false;

// Workaround for IE11/Edge: Fill silent aac frame after keyframe-seeking
// Make audio beginDts equals with video beginDts, in order to fix seek freeze
Expand Down Expand Up @@ -331,7 +331,7 @@ class MP4Remuxer {
let dts = videoSegment.beginDts;
let silentFrameDuration = firstSampleDts - videoSegment.beginDts;
Log.v(this.TAG, `InsertPrefixSilentAudio: dts: ${dts}, duration: ${silentFrameDuration}`);
samples.unshift({unit: silentUnit, dts: dts, pts: dts});
samples.unshift({ unit: silentUnit, dts: dts, pts: dts });
mdatBytes += silentUnit.byteLength;
} // silentUnit == null: Cannot generate, skip
} else {
Expand All @@ -346,92 +346,109 @@ class MP4Remuxer {
let sample = samples[i];
let unit = sample.unit;
let originalDts = sample.dts - this._dtsBase;
let dts = originalDts - dtsCorrection;
let dts = originalDts;
let needFillSilentFrames = false;
let silentFrames = null;
let sampleDuration = 0;

if (firstDts === -1) {
firstDts = dts;
if (originalDts < -0.001) {
continue; //pass the first sample with the invalid dts
}

let sampleDuration = 0;
if (this._audioMeta.codec !== 'mp3') {
// for AAC codec, we need to keep dts increase based on refSampleDuration
let curRefDts = originalDts;
const maxAudioFramesDrift = 3;
if (this._audioNextDts) {
curRefDts = this._audioNextDts;
}

if (i !== samples.length - 1) {
let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection;
sampleDuration = nextDts - dts;
} else { // the last sample
if (lastSample != null) { // use stashed sample's dts to calculate sample duration
let nextDts = lastSample.dts - this._dtsBase - dtsCorrection;
sampleDuration = nextDts - dts;
} else if (mp4Samples.length >= 1) { // use second last sample duration
sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
} else { // the only one sample, use reference sample duration
sampleDuration = Math.floor(refSampleDuration);
dtsCorrection = originalDts - curRefDts;
if (dtsCorrection <= -maxAudioFramesDrift * refSampleDuration) {
// If we're overlapping by more than maxAudioFramesDrift number of frame, drop this sample
Log.w(this.TAG, `Dropping 1 audio frame (originalDts: ${originalDts} ms ,curRefDts: ${curRefDts} ms) due to dtsCorrection: ${dtsCorrection} ms overlap.`);
continue;
}
}
else if (dtsCorrection >= maxAudioFramesDrift * refSampleDuration && this._fillAudioTimestampGap && !Browser.safari) {
// Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap
needFillSilentFrames = true;
// We need to insert silent frames to fill timestamp gap
let frameCount = Math.floor(dtsCorrection / refSampleDuration);
Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
'Silent frames will be generated to avoid unsync.\n' +
`originalDts: ${originalDts} ms, curRefDts: ${curRefDts} ms, ` +
`dtsCorrection: ${Math.round(dtsCorrection)} ms, generate: ${frameCount} frames`);


dts = Math.floor(curRefDts);
sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts;

let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
if (silentUnit == null) {
Log.w(this.TAG, 'Unable to generate silent frame for ' +
`${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
// Repeat last frame
silentUnit = unit;
}
silentFrames = [];

for (let j = 0; j < frameCount; j++) {
curRefDts = curRefDts + refSampleDuration;
let intDts = Math.floor(curRefDts); // change to integer
let intDuration = Math.floor(curRefDts + refSampleDuration) - intDts;
let frame = {
dts: intDts,
pts: intDts,
cts: 0,
unit: silentUnit,
size: silentUnit.byteLength,
duration: intDuration, // wait for next sample
originalDts: originalDts,
flags: {
isLeading: 0,
dependsOn: 1,
isDependedOn: 0,
hasRedundancy: 0
}
};
silentFrames.push(frame);
mdatBytes += frame.size;;

let needFillSilentFrames = false;
let silentFrames = null;
}

// Silent frame generation, if large timestamp gap detected && config.fixAudioTimestampGap
if (sampleDuration > refSampleDuration * 1.5 && this._audioMeta.codec !== 'mp3' && this._fillAudioTimestampGap && !Browser.safari) {
// We need to insert silent frames to fill timestamp gap
needFillSilentFrames = true;
let delta = Math.abs(sampleDuration - refSampleDuration);
let frameCount = Math.ceil(delta / refSampleDuration);
let currentDts = dts + refSampleDuration; // Notice: in float
this._audioNextDts = curRefDts + refSampleDuration;

Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
'Silent frames will be generated to avoid unsync.\n' +
`dts: ${dts + sampleDuration} ms, expected: ${dts + Math.round(refSampleDuration)} ms, ` +
`delta: ${Math.round(delta)} ms, generate: ${frameCount} frames`);
} else {

let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
if (silentUnit == null) {
Log.w(this.TAG, 'Unable to generate silent frame for ' +
`${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
// Repeat last frame
silentUnit = unit;
}
silentFrames = [];

for (let j = 0; j < frameCount; j++) {
let intDts = Math.round(currentDts); // round to integer
if (silentFrames.length > 0) {
// Set previous frame sample duration
let previousFrame = silentFrames[silentFrames.length - 1];
previousFrame.duration = intDts - previousFrame.dts;
}
let frame = {
dts: intDts,
pts: intDts,
cts: 0,
unit: silentUnit,
size: silentUnit.byteLength,
duration: 0, // wait for next sample
originalDts: originalDts,
flags: {
isLeading: 0,
dependsOn: 1,
isDependedOn: 0,
hasRedundancy: 0
}
};
silentFrames.push(frame);
mdatBytes += frame.size;
currentDts += refSampleDuration;
}
dts = Math.floor(curRefDts);
sampleDuration = Math.floor(curRefDts + refSampleDuration) - dts;
this._audioNextDts = curRefDts + refSampleDuration;

// last frame: align end time to next frame dts
let lastFrame = silentFrames[silentFrames.length - 1];
lastFrame.duration = dts + sampleDuration - lastFrame.dts;
}
} else {
// keep the original dts calculate algorithm for mp3
dts = originalDts - dtsCorrection;

// silentFrames.forEach((frame) => {
// Log.w(this.TAG, `SilentAudio: dts: ${frame.dts}, duration: ${frame.duration}`);
// });

// Set correct sample duration for current frame
sampleDuration = Math.round(refSampleDuration);
if (i !== samples.length - 1) {
let nextDts = samples[i + 1].dts - this._dtsBase - dtsCorrection;
sampleDuration = nextDts - dts;
} else { // the last sample
if (lastSample != null) { // use stashed sample's dts to calculate sample duration
let nextDts = lastSample.dts - this._dtsBase - dtsCorrection;
sampleDuration = nextDts - dts;
} else if (mp4Samples.length >= 1) { // use second last sample duration
sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
} else { // the only one sample, use reference sample duration
sampleDuration = Math.floor(refSampleDuration);
}
}
this._audioNextDts = dts + sampleDuration;
}

if (firstDts === -1) {
firstDts = dts;
}
mp4Samples.push({
dts: dts,
pts: dts,
Expand All @@ -454,6 +471,13 @@ class MP4Remuxer {
}
}

if (mp4Samples.length === 0) {
//no samples need to remux
track.samples = [];
track.length = 0;
return;
}

// allocate mdatbox
if (mpegRawTrack) {
// allocate for raw mpeg buffer
Expand All @@ -464,7 +488,7 @@ class MP4Remuxer {
// size field
mdatbox[0] = (mdatBytes >>> 24) & 0xFF;
mdatbox[1] = (mdatBytes >>> 16) & 0xFF;
mdatbox[2] = (mdatBytes >>> 8) & 0xFF;
mdatbox[2] = (mdatBytes >>> 8) & 0xFF;
mdatbox[3] = (mdatBytes) & 0xFF;
// type field (fourCC)
mdatbox.set(MP4.types.mdat, 4);
Expand All @@ -479,7 +503,7 @@ class MP4Remuxer {

let latest = mp4Samples[mp4Samples.length - 1];
lastDts = latest.dts + latest.duration;
this._audioNextDts = lastDts;
//this._audioNextDts = lastDts;

// fill media segment info & add to info list
let info = new MediaSegmentInfo();
Expand All @@ -490,15 +514,15 @@ class MP4Remuxer {
info.originalBeginDts = mp4Samples[0].originalDts;
info.originalEndDts = latest.originalDts + latest.duration;
info.firstSample = new SampleInfo(mp4Samples[0].dts,
mp4Samples[0].pts,
mp4Samples[0].duration,
mp4Samples[0].originalDts,
false);
mp4Samples[0].pts,
mp4Samples[0].duration,
mp4Samples[0].originalDts,
false);
info.lastSample = new SampleInfo(latest.dts,
latest.pts,
latest.duration,
latest.originalDts,
false);
latest.pts,
latest.duration,
latest.originalDts,
false);
if (!this._isLive) {
this._audioSegmentInfoList.append(info);
}
Expand Down Expand Up @@ -667,7 +691,7 @@ class MP4Remuxer {
mdatbox = new Uint8Array(mdatBytes);
mdatbox[0] = (mdatBytes >>> 24) & 0xFF;
mdatbox[1] = (mdatBytes >>> 16) & 0xFF;
mdatbox[2] = (mdatBytes >>> 8) & 0xFF;
mdatbox[2] = (mdatBytes >>> 8) & 0xFF;
mdatbox[3] = (mdatBytes) & 0xFF;
mdatbox.set(MP4.types.mdat, 4);

Expand Down Expand Up @@ -695,15 +719,15 @@ class MP4Remuxer {
info.originalBeginDts = mp4Samples[0].originalDts;
info.originalEndDts = latest.originalDts + latest.duration;
info.firstSample = new SampleInfo(mp4Samples[0].dts,
mp4Samples[0].pts,
mp4Samples[0].duration,
mp4Samples[0].originalDts,
mp4Samples[0].isKeyframe);
mp4Samples[0].pts,
mp4Samples[0].duration,
mp4Samples[0].originalDts,
mp4Samples[0].isKeyframe);
info.lastSample = new SampleInfo(latest.dts,
latest.pts,
latest.duration,
latest.originalDts,
latest.isKeyframe);
latest.pts,
latest.duration,
latest.originalDts,
latest.isKeyframe);
if (!this._isLive) {
this._videoSegmentInfoList.append(info);
}
Expand Down Expand Up @@ -740,4 +764,4 @@ class MP4Remuxer {

}

export default MP4Remuxer;
export default MP4Remuxer;

0 comments on commit 26d00d9

Please sign in to comment.