Skip to content

Commit

Permalink
h264: improve DTS extractor
Browse files Browse the repository at this point in the history
This improves the DTS extractor in order to work with streams with no
timing infos or incorrect timing infos
  • Loading branch information
aler9 committed Jan 6, 2023
1 parent f7e8c5b commit a759ba9
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 389 deletions.
189 changes: 47 additions & 142 deletions pkg/codecs/h264/dtsextractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,9 @@ func getPictureOrderCount(buf []byte, sps *SPS) (uint32, error) {
return 0, fmt.Errorf("frame_mbs_only_flag = 0 is not supported")
}

var picOrderCntLsb uint64
switch {
case sps.PicOrderCntType == 0:
picOrderCntLsb, err = bits.ReadBits(buf, &pos, int(sps.Log2MaxPicOrderCntLsbMinus4+4))
if err != nil {
return 0, err
}

default:
return 0, fmt.Errorf("pic_order_cnt_type = 1 is not supported")
picOrderCntLsb, err := bits.ReadBits(buf, &pos, int(sps.Log2MaxPicOrderCntLsbMinus4+4))
if err != nil {
return 0, err
}

return uint32(picOrderCntLsb), nil
Expand Down Expand Up @@ -78,141 +71,21 @@ func getPictureOrderCountDiff(poc1 uint32, poc2 uint32, sps *SPS) int32 {
return diff
}

type seiTimingInfo struct {
cpbRemovalDelay uint32
dpbOutputDelay uint32
}

func parseSEITimingInfo(buf []byte, sps *SPS) (*seiTimingInfo, bool) {
buf = EmulationPreventionRemove(buf)
pos := 1

for {
if pos >= (len(buf) - 1) {
return nil, false
}

payloadType := 0
for {
byt := buf[pos]
pos++
payloadType += int(byt)
if byt != 0xFF {
break
}
}

payloadSize := 0
for {
byt := buf[pos]
pos++
payloadSize += int(byt)
if byt != 0xFF {
break
}
}

if payloadType == 1 { // timing info
buf2 := buf[pos:]
pos2 := 0

ret := &seiTimingInfo{}

tmp, err := bits.ReadBits(buf2, &pos2, int(sps.VUI.NalHRD.CpbRemovalDelayLengthMinus1+1))
if err != nil {
return nil, false
}
ret.cpbRemovalDelay = uint32(tmp)

tmp, err = bits.ReadBits(buf2, &pos2, int(sps.VUI.NalHRD.DpbOutputDelayLengthMinus1+1))
if err != nil {
return nil, false
}
ret.dpbOutputDelay = uint32(tmp)

return ret, true
}

pos += payloadSize
}
}

func findSEITimingInfo(au [][]byte, sps *SPS) (*seiTimingInfo, bool) {
for _, nalu := range au {
typ := NALUType(nalu[0] & 0x1F)
if typ == NALUTypeSEI {
ret, ok := parseSEITimingInfo(nalu, sps)
if ok {
return ret, true
}
}
}
return nil, false
}

// DTSExtractor allows to extract DTS from PTS.
type DTSExtractor struct {
spsp *SPS
prevDTSFilled bool
prevDTS time.Duration
expectedPOC uint32
spsp *SPS
prevDTSFilled bool
prevDTS time.Duration
expectedPOC uint32
reorderedFrames int
pauseDTS int
}

// NewDTSExtractor allocates a DTSExtractor.
func NewDTSExtractor() *DTSExtractor {
return &DTSExtractor{}
}

// returns the difference between PTS POC (picture order count) and DTS POC.
func (d *DTSExtractor) findPOCDiff(idrPresent bool, au [][]byte) (int, error) {
switch {
// POC difference is computed by using PTS POC, timing infos and max_num_reorder_frames
case d.spsp.PicOrderCntType != 2 &&
d.spsp.VUI != nil && d.spsp.VUI.TimingInfo != nil && d.spsp.VUI.BitstreamRestriction != nil:
if idrPresent {
d.expectedPOC = 0
return int(d.spsp.VUI.BitstreamRestriction.MaxNumReorderFrames * 2), nil
}

// compute expectedPOC immediately in order to store it even in case of errors
d.expectedPOC += 2
d.expectedPOC &= ((1 << (d.spsp.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1)

poc, err := findPictureOrderCount(au, d.spsp)
if err != nil {
return 0, err
}

pocDiff := int(getPictureOrderCountDiff(poc, d.expectedPOC, d.spsp)) +
int(d.spsp.VUI.BitstreamRestriction.MaxNumReorderFrames*2)

return pocDiff, nil

// POC difference is computed from SEI
case d.spsp.VUI != nil && d.spsp.VUI.TimingInfo != nil && d.spsp.VUI.NalHRD != nil:
ti, ok := findSEITimingInfo(au, d.spsp)
if !ok {
// some streams declare that they use SEI pic timings, but they don't.
// assume PTS = DTS.
return 0, nil
}

// workaround for nvenc
// nvenc puts a wrong dpbOutputDelay into timing infos of non-starting IDR frames
// https://forums.developer.nvidia.com/t/nvcodec-h-264-encoder-sei-pic-timing-dpb-output-delay/156050
// https://forums.developer.nvidia.com/t/h264-pic-timing-sei-message/71188
if idrPresent && ti.cpbRemovalDelay > 0 {
ti.dpbOutputDelay = 2
}

return int(ti.dpbOutputDelay), nil

// assume PTS = DTS
default:
return 0, nil
}
}

func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Duration, error) {
idrPresent := false

Expand All @@ -236,20 +109,52 @@ func (d *DTSExtractor) extractInner(au [][]byte, pts time.Duration) (time.Durati
return 0, fmt.Errorf("SPS not received yet")
}

if d.spsp.VUI == nil || d.spsp.VUI.TimingInfo == nil {
if d.spsp.PicOrderCntType == 2 {
return pts, nil
}

if d.spsp.PicOrderCntType == 1 {
return 0, fmt.Errorf("pic_order_cnt_type = 1 is not supported yet")
}

if idrPresent {
d.expectedPOC = 0
d.reorderedFrames = 0
d.pauseDTS = 0
return pts, nil
}

pocDiff, err := d.findPOCDiff(idrPresent, au)
d.expectedPOC += 2
d.expectedPOC &= ((1 << (d.spsp.Log2MaxPicOrderCntLsbMinus4 + 4)) - 1)

if d.pauseDTS > 0 {
d.pauseDTS--
return d.prevDTS + 1*time.Millisecond, nil
}

poc, err := findPictureOrderCount(au, d.spsp)
if err != nil {
return 0, err
}

timeDiff := time.Duration(pocDiff) * time.Second *
time.Duration(d.spsp.VUI.TimingInfo.NumUnitsInTick) / time.Duration(d.spsp.VUI.TimingInfo.TimeScale)
dts := pts - timeDiff
pocDiff := int(getPictureOrderCountDiff(poc, d.expectedPOC, d.spsp)) + d.reorderedFrames*2

if pocDiff < 0 {
return 0, fmt.Errorf("invalid POC")
}

if pocDiff == 0 {
return pts, nil
}

reorderedFrames := (pocDiff - d.reorderedFrames*2) / 2
if reorderedFrames > d.reorderedFrames {
d.pauseDTS = (reorderedFrames - d.reorderedFrames - 1)
d.reorderedFrames = reorderedFrames
return d.prevDTS + 1*time.Millisecond, nil
}

return dts, nil
return d.prevDTS + ((pts - d.prevDTS) * 2 / time.Duration(pocDiff+2)), nil
}

// Extract extracts the DTS of a access unit.
Expand Down

0 comments on commit a759ba9

Please sign in to comment.