Skip to content

Commit

Permalink
Add support for WebVTT (#223)
Browse files Browse the repository at this point in the history
* Add support for WebVTT

Use <https://www.iana.org/assignments/media-types/text/vtt> for magic
numbers.

* Add VTT signatures that include EOF

* Remove pointy brackets around URL

* Simplify Vtt

* delete bytePointer
  • Loading branch information
joksas committed Jan 20, 2022
1 parent 4d3e6db commit 568fc39
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 6 deletions.
25 changes: 25 additions & 0 deletions internal/magic/text.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,28 @@ func Srt(in []byte, _ uint32) bool {
// A third line must exist and not be empty. This is the actual subtitle text.
return s.Scan() && len(s.Bytes()) != 0
}

// Vtt matches a Web Video Text Tracks (WebVTT) file. See
// https://www.iana.org/assignments/media-types/text/vtt.
func Vtt(raw []byte, limit uint32) bool {
// Prefix match.
prefixes := [][]byte{
{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // UTF-8 BOM, "WEBVTT" and a line feed
{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // UTF-8 BOM, "WEBVTT" and a carriage return
{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // UTF-8 BOM, "WEBVTT" and a space
{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // UTF-8 BOM, "WEBVTT" and a horizontal tab
{0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // "WEBVTT" and a line feed
{0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // "WEBVTT" and a carriage return
{0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // "WEBVTT" and a space
{0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // "WEBVTT" and a horizontal tab
}
for _, p := range prefixes {
if bytes.HasPrefix(raw, p) {
return true
}
}

// Exact match.
return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT"
bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT"
}
4 changes: 4 additions & 0 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ var files = map[string]string{
"vcf.dos.vcf": "text/vcard",
"vcf.vcf": "text/vcard",
"voc.voc": "audio/x-unknown",
"vtt.vtt": "text/vtt",
"vtt.space.vtt": "text/vtt",
"vtt.tab.vtt": "text/vtt",
"vtt.eof.vtt": "text/vtt",
"warc.warc": "application/warc",
"wasm.wasm": "application/wasm",
"wav.wav": "audio/wav",
Expand Down
3 changes: 2 additions & 1 deletion supported_mimes.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 167 Supported MIME types
## 168 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.

Extension | MIME type | Aliases
Expand Down Expand Up @@ -170,3 +170,4 @@ Extension | MIME type | Aliases
**.vcf** | text/vcard | -
**.ics** | text/calendar | -
**.warc** | application/warc | -
**.vtt** | text/vtt | -
1 change: 1 addition & 0 deletions testdata/vtt.eof.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WEBVTT
1 change: 1 addition & 0 deletions testdata/vtt.space.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WEBVTT
1 change: 1 addition & 0 deletions testdata/vtt.tab.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WEBVTT
45 changes: 45 additions & 0 deletions testdata/vtt.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
WEBVTT
Kind: captions
Language: en
00:09.000 --> 00:11.000
<v Roger Bingham>We are in New York City

00:11.000 --> 00:13.000
<v Roger Bingham>We are in New York City

00:13.000 --> 00:16.000
<v Roger Bingham>We're actually at the Lucern Hotel, just down the street

00:16.000 --> 00:18.000
<v Roger Bingham>from the American Museum of Natural History

00:18.000 --> 00:20.000
<v Roger Bingham>And with me is Neil deGrasse Tyson

00:20.000 --> 00:22.000
<v Roger Bingham>Astrophysicist, Director of the Hayden Planetarium

00:22.000 --> 00:24.000
<v Roger Bingham>at the AMNH.

00:24.000 --> 00:26.000
<v Roger Bingham>Thank you for walking down here.

00:27.000 --> 00:30.000
<v Roger Bingham>And I want to do a follow-up on the last conversation we did.

00:30.000 --> 00:31.500 align:end size:50%
<v Roger Bingham>When we e-mailed—

00:30.500 --> 00:32.500 align:start size:50%
<v Neil deGrasse Tyson>Didn't we talk about enough in that conversation?

00:32.000 --> 00:35.500 align:end size:50%
<v Roger Bingham>No! No no no no; 'cos 'cos obviously 'cos

00:32.500 --> 00:33.500 align:start size:50%
<v Neil deGrasse Tyson><i>Laughs</i>

00:35.500 --> 00:38.000
<v Roger Bingham>You know I'm so excited my glasses are falling off here.
11 changes: 6 additions & 5 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ var (
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt)
xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2)
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har)
har = newMIME("application/json", ".har", magic.HAR)
Expand All @@ -87,10 +87,11 @@ var (
html = newMIME("text/html", ".html", magic.HTML)
php = newMIME("text/x-php", ".php", magic.Php)
rtf = newMIME("text/rtf", ".rtf", magic.Rtf)
srt = newMIME("text/x-subrip", ".srt", magic.Srt).
alias("text/x-srt")
js = newMIME("application/javascript", ".js", magic.Js).
alias("application/x-javascript", "text/javascript")
js = newMIME("application/javascript", ".js", magic.Js).
alias("application/x-javascript", "text/javascript")
srt = newMIME("text/x-subrip", ".srt", magic.Srt).
alias("text/x-srt")
vtt = newMIME("text/vtt", ".vtt", magic.Vtt)
lua = newMIME("text/x-lua", ".lua", magic.Lua)
perl = newMIME("text/x-perl", ".pl", magic.Perl)
python = newMIME("application/x-python", ".py", magic.Python)
Expand Down

0 comments on commit 568fc39

Please sign in to comment.