diff --git a/subtitles.go b/subtitles.go index 8ffd2e7..5aa3490 100644 --- a/subtitles.go +++ b/subtitles.go @@ -32,23 +32,22 @@ var Now = func() time.Time { // Options represents open or write options type Options struct { - Page int - PID int - Src string + Filename string + Teletext TeletextOptions } -// Open opens a subtitle file based on options +// Open opens a subtitle reader based on options func Open(o Options) (s *Subtitles, err error) { // Open the file var f *os.File - if f, err = os.Open(o.Src); err != nil { - err = errors.Wrapf(err, "astisub: opening %s failed", o.Src) + if f, err = os.Open(o.Filename); err != nil { + err = errors.Wrapf(err, "astisub: opening %s failed", o.Filename) return } defer f.Close() // Parse the content - switch filepath.Ext(o.Src) { + switch filepath.Ext(o.Filename) { case ".srt": s, err = ReadFromSRT(f) case ".ssa", ".ass": @@ -56,7 +55,7 @@ func Open(o Options) (s *Subtitles, err error) { case ".stl": s, err = ReadFromSTL(f) case ".ts": - s, err = ReadFromTeletext(f, o.PID, o.Page) + s, err = ReadFromTeletext(f, o.Teletext) case ".ttml": s, err = ReadFromTTML(f) case ".vtt": @@ -68,8 +67,8 @@ func Open(o Options) (s *Subtitles, err error) { } // OpenFile opens a file regardless of other options -func OpenFile(src string) (*Subtitles, error) { - return Open(Options{Src: src}) +func OpenFile(filename string) (*Subtitles, error) { + return Open(Options{Filename: filename}) } // Subtitles represents an ordered list of items with formatting diff --git a/teletext.go b/teletext.go index 704eb40..d351401 100644 --- a/teletext.go +++ b/teletext.go @@ -1,8 +1,303 @@ package astisub -import "io" +import ( + "context" + "fmt" + "io" + + "github.com/asticode/go-astits" + "github.com/pkg/errors" +) + +// Errors +var ( + ErrNoValidTeletextPID = errors.New("astisub: no valid teletext PID") +) + +// Teletext PES data types +const ( + teletextPESDataTypeEBU = "EBU" + teletextPESDataTypeUnknown = "unknown" +) + +// Teletext PES data unit types +const ( + teletextPESDataUnitTypeEBUNonSubtitleData = 0x2 + teletextPESDataUnitTypeEBUSubtitleData = 0x3 + teletextPESDataUnitTypeStuffing = 0xff +) + +// TeletextOptions represents teletext options +type TeletextOptions struct { + Page int + PID int +} // ReadFromTeletext parses a teletext content -func ReadFromTeletext(r io.ReadSeeker, pid, page int) (o *Subtitles, err error) { +// http://www.etsi.org/deliver/etsi_en/300400_300499/300472/01.03.01_60/en_300472v010301p.pdf +// http://www.etsi.org/deliver/etsi_i_ets/300700_300799/300706/01_60/ets_300706e01p.pdf +func ReadFromTeletext(r io.Reader, o TeletextOptions) (s *Subtitles, err error) { + // Init demuxer + var dmx = astits.New(context.Background(), r) + + // Get the teletext PID + var pid uint16 + if pid, err = teletextPID(dmx, o); err != nil { + if err != ErrNoValidTeletextPID { + err = errors.Wrap(err, "astisub: getting teletext PID failed") + } + return + } + + // Loop in data + var d *astits.Data + for { + // Fetch next data + if d, err = dmx.NextData(); err != nil { + if err == astits.ErrNoMorePackets { + err = nil + break + } + err = errors.Wrap(err, "astisub: fetching next data failed") + return + } + + // This data is not of interest to us + if d.PID != pid || d.PES == nil || d.PES.Header.StreamID != astits.StreamIDPrivateStream1 { + continue + } + + // Parse PES data + var td *teletextPESData + if td, err = parseTeletextPESData(d.PES.Data); err != nil { + err = errors.Wrap(err, "astisub: parsing teletext PES data failed") + return + } + _ = td + } + return +} + +// teletextPID returns the teletext PID. +// If the PID teletext option is not indicated, it will walk through the ts data until it reaches a PMT packet to +// detect the first valid teletext PID +func teletextPID(dmx *astits.Demuxer, o TeletextOptions) (pid uint16, err error) { + // PID is in the options + if o.PID > 0 { + pid = uint16(o.PID) + return + } + + // Loop in data + var d *astits.Data + for { + // Fetch next data + if d, err = dmx.NextData(); err != nil { + if err == astits.ErrNoMorePackets { + err = ErrNoValidTeletextPID + return + } + err = errors.Wrap(err, "astisub: fetching next data failed") + return + } + + // PMT data + if d.PMT != nil { + // Retrieve valid teletext PIDs + var pids []uint16 + for _, s := range d.PMT.ElementaryStreams { + for _, dsc := range s.ElementaryStreamDescriptors { + if dsc.Tag == astits.DescriptorTagTeletext || dsc.Tag == astits.DescriptorTagVBITeletext { + pids = append(pids, s.ElementaryPID) + } + } + } + + // No valid teletext PIDs + if len(pids) == 0 { + err = ErrNoValidTeletextPID + return + } + + // Set pid + pid = pids[0] + + // Rewind + if _, err = dmx.Rewind(); err != nil { + err = errors.Wrap(err, "astisub: rewinding failed") + return + } + return + } + } + return +} + +// teletextPESData represents a teletext PES data +type teletextPESData struct { + dataIdentifier uint8 + units []*teletextPESDataUnit +} + +// teletextPESDataUnit represents a teletext PES data unit +type teletextPESDataUnit struct { + data []byte + designationCode uint8 + fieldParity bool + framingCode uint8 + id uint8 + length uint8 + lineOffset uint8 + magazineNumber uint8 + packetNumber uint8 +} + +// LineOffsetNumber returns the teletext data unit line offset number +func (u teletextPESDataUnit) LineOffsetNumber() int { + if u.lineOffset < 0x7 || u.lineOffset > 0x16 { + return 0 + } + var offset int + if !u.fieldParity { + offset = 313 + } + return int(u.lineOffset) + offset +} + +// parseTeletextPESData parses a Teletext PES data +func parseTeletextPESData(i []byte) (d *teletextPESData, err error) { + // Init + d = &teletextPESData{} + var offset int + + // Data identifier + d.dataIdentifier = uint8(i[offset]) + offset += 1 + + // Loop until end of data + for offset < len(i) { + // Parse data unit + parseTeletextPESDataUnit(i, &offset, d) + } + return +} + +// parseTeletextPESData parses a Teletext PES data unit +func parseTeletextPESDataUnit(i []byte, offset *int, d *teletextPESData) { + // Init + var u = &teletextPESDataUnit{} + + // ID + u.id = uint8(i[*offset]) + *offset += 1 + + // Length + u.length = uint8(i[*offset]) + *offset += 1 + + // Make sure we seek at the end of the data unit once everything is done + var offsetEnd = *offset + int(u.length) + defer func(offset *int) { + *offset = offsetEnd + }(offset) + + // Unprocessed data unit ids + // TODO Should we process other ids? + if u.id != teletextPESDataUnitTypeEBUSubtitleData { + return + } + + // Field parity + u.fieldParity = i[*offset]&0x20 > 0 + + // Line offset + u.lineOffset = uint8(i[*offset] & 0x1f) + *offset += 1 + + // Framing code + u.framingCode = uint8(i[*offset]) + *offset += 1 + + // Framing code must be 11100100 + if u.framingCode != 0xe4 { + return + } + + // Magazine number and packet number + var h, h1, h2 uint8 + var errHamming error + if h1, errHamming = hamming84Decode(i[*offset]); errHamming != nil { + return + } + if h2, errHamming = hamming84Decode(i[*offset+1]); errHamming != nil { + return + } + h = h2<<4 | h1 + u.magazineNumber = h & 0x7 + if u.magazineNumber == 0 { + u.magazineNumber = 8 + } + u.packetNumber = h >> 3 + *offset += 2 + + // Designation code + if u.packetNumber > 25 { + if u.designationCode, errHamming = hamming84Decode(i[*offset]); errHamming != nil { + return + } + *offset += 1 + } + + // Data + u.data = i[*offset:offsetEnd] + + // Append data unit + d.units = append(d.units, u) + return +} + +// teletextPESDataType returns the teletext PES data type based on the data identifier +func teletextPESDataType(dataIdentifier uint8) string { + switch { + case dataIdentifier >= 0x10 && dataIdentifier <= 0x1f: + return teletextPESDataTypeEBU + } + return teletextPESDataTypeUnknown +} + +// hamming84Decode decodes a Hamming 8/4 +func hamming84Decode(i byte) (o uint8, err error) { + p1, d1, p2, d2, p3, d3, p4, d4 := i>>7&0x1, i>>6&0x1, i>>5&0x1, i>>4&0x1, i>>3&0x1, i>>2&0x1, i>>1&0x1, i&0x1 + testA := p1^d1^d3^d4 > 0 + testB := d1^p2^d2^d4 > 0 + testC := d1^d2^p3^d3 > 0 + testD := p1^d1^p2^d2^p3^d3^p4^d4 > 0 + if testA && testB && testC { + // p4 may be incorrect + } else if testD && (!testA || !testB || !testC) { + err = fmt.Errorf("hamming 8/4 decode of %.8b failed", i) + return + } else { + if !testA && testB && testC { + // p1 is incorrect + } else if testA && !testB && testC { + // p2 is incorrect + } else if testA && testB && !testC { + // p3 is incorrect + } else if !testA && !testB && testC { + // d4 is incorrect + d4 ^= 1 + } else if testA && !testB && !testC { + // d2 is incorrect + d2 ^= 1 + } else if !testA && testB && !testC { + // d3 is incorrect + d3 ^= 1 + } else { + // d1 is incorrect + d1 ^= 1 + } + } + o = uint8(d4<<3 | d3<<2 | d2<<1 | d1) return } diff --git a/teletext_test.go b/teletext_test.go new file mode 100644 index 0000000..58962f5 --- /dev/null +++ b/teletext_test.go @@ -0,0 +1,52 @@ +package astisub + +import ( + "testing" + + "math/bits" + + "github.com/stretchr/testify/assert" +) + +func TestTeletextPESDataType(t *testing.T) { + m := make(map[int]string) + for i := 0; i < 255; i++ { + t := teletextPESDataType(uint8(i)) + if t != teletextPESDataTypeUnknown { + m[i] = t + } + } + assert.Equal(t, map[int]string{19: teletextPESDataTypeEBU, 20: teletextPESDataTypeEBU, 21: teletextPESDataTypeEBU, 26: teletextPESDataTypeEBU, 28: teletextPESDataTypeEBU, 17: teletextPESDataTypeEBU, 27: teletextPESDataTypeEBU, 31: teletextPESDataTypeEBU, 16: teletextPESDataTypeEBU, 18: teletextPESDataTypeEBU, 23: teletextPESDataTypeEBU, 29: teletextPESDataTypeEBU, 22: teletextPESDataTypeEBU, 24: teletextPESDataTypeEBU, 25: teletextPESDataTypeEBU, 30: teletextPESDataTypeEBU}, m) +} + +var hamming84Mapping = []uint8{ + 0x01, 0xff, 0x01, 0x01, 0xff, 0x00, 0x01, 0xff, 0xff, 0x02, 0x01, 0xff, 0x0a, 0xff, 0xff, 0x07, + 0xff, 0x00, 0x01, 0xff, 0x00, 0x00, 0xff, 0x00, 0x06, 0xff, 0xff, 0x0b, 0xff, 0x00, 0x03, 0xff, + 0xff, 0x0c, 0x01, 0xff, 0x04, 0xff, 0xff, 0x07, 0x06, 0xff, 0xff, 0x07, 0xff, 0x07, 0x07, 0x07, + 0x06, 0xff, 0xff, 0x05, 0xff, 0x00, 0x0d, 0xff, 0x06, 0x06, 0x06, 0xff, 0x06, 0xff, 0xff, 0x07, + 0xff, 0x02, 0x01, 0xff, 0x04, 0xff, 0xff, 0x09, 0x02, 0x02, 0xff, 0x02, 0xff, 0x02, 0x03, 0xff, + 0x08, 0xff, 0xff, 0x05, 0xff, 0x00, 0x03, 0xff, 0xff, 0x02, 0x03, 0xff, 0x03, 0xff, 0x03, 0x03, + 0x04, 0xff, 0xff, 0x05, 0x04, 0x04, 0x04, 0xff, 0xff, 0x02, 0x0f, 0xff, 0x04, 0xff, 0xff, 0x07, + 0xff, 0x05, 0x05, 0x05, 0x04, 0xff, 0xff, 0x05, 0x06, 0xff, 0xff, 0x05, 0xff, 0x0e, 0x03, 0xff, + 0xff, 0x0c, 0x01, 0xff, 0x0a, 0xff, 0xff, 0x09, 0x0a, 0xff, 0xff, 0x0b, 0x0a, 0x0a, 0x0a, 0xff, + 0x08, 0xff, 0xff, 0x0b, 0xff, 0x00, 0x0d, 0xff, 0xff, 0x0b, 0x0b, 0x0b, 0x0a, 0xff, 0xff, 0x0b, + 0x0c, 0x0c, 0xff, 0x0c, 0xff, 0x0c, 0x0d, 0xff, 0xff, 0x0c, 0x0f, 0xff, 0x0a, 0xff, 0xff, 0x07, + 0xff, 0x0c, 0x0d, 0xff, 0x0d, 0xff, 0x0d, 0x0d, 0x06, 0xff, 0xff, 0x0b, 0xff, 0x0e, 0x0d, 0xff, + 0x08, 0xff, 0xff, 0x09, 0xff, 0x09, 0x09, 0x09, 0xff, 0x02, 0x0f, 0xff, 0x0a, 0xff, 0xff, 0x09, + 0x08, 0x08, 0x08, 0xff, 0x08, 0xff, 0xff, 0x09, 0x08, 0xff, 0xff, 0x0b, 0xff, 0x0e, 0x03, 0xff, + 0xff, 0x0c, 0x0f, 0xff, 0x04, 0xff, 0xff, 0x09, 0x0f, 0xff, 0x0f, 0x0f, 0xff, 0x0e, 0x0f, 0xff, + 0x08, 0xff, 0xff, 0x05, 0xff, 0x0e, 0x0d, 0xff, 0xff, 0x0e, 0x0f, 0xff, 0x0e, 0x0e, 0xff, 0x0e, +} + +func TestHamming84(t *testing.T) { + for i := 0; i < 255; i++ { + v, err := hamming84Decode(byte(bits.Reverse8(uint8(i)))) + e := hamming84Mapping[i] + if e == 0xff { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, e&0x0f, v) + } + } +}