From c9aeb4fcf479dadb8e48c9877546cb4fa5283313 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 22 Dec 2021 11:19:17 +0000 Subject: [PATCH 01/16] Bump actions/setup-go from 2.1.4 to 2.1.5 Bumps [actions/setup-go](https://github.com/actions/setup-go) from 2.1.4 to 2.1.5. - [Release notes](https://github.com/actions/setup-go/releases) - [Commits](https://github.com/actions/setup-go/compare/v2.1.4...v2.1.5) --- updated-dependencies: - dependency-name: actions/setup-go dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/go.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a4d5044..dfa6dd7 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Go - uses: actions/setup-go@v2.1.4 + uses: actions/setup-go@v2.1.5 with: go-version: 1.16 - name: Checkout code @@ -27,7 +27,7 @@ jobs: steps: - name: Install Go if: success() - uses: actions/setup-go@v2.1.4 + uses: actions/setup-go@v2.1.5 with: go-version: ${{ matrix.go-version }} - name: Checkout code @@ -40,7 +40,7 @@ jobs: steps: - name: Install Go if: success() - uses: actions/setup-go@v2.1.4 + uses: actions/setup-go@v2.1.5 with: go-version: 1.16 - name: Checkout code From e7d77264325828322a63003c21ddae5093cde4d2 Mon Sep 17 00:00:00 2001 From: Dovydas Joksas Date: Mon, 17 Jan 2022 20:19:24 +0000 Subject: [PATCH 02/16] Add support for SubRip (#232) * Add support for SubRip file format * Handle Windows newlines In Windows, a newline is represented as `\r\n`. * Add benchmark for regex vs time.Parse .srt detection * Make Srt timestamp detection more restrictive * Remove unused Srt functions * Use a constant for Srt timestamp layout * Remove redundant comments in Srt * make `Srt` work with Go <1.17 * Ensure correct Srt timestamp length before processing Co-authored-by: Gabriel Vasile * Remove `application/srt` alias from Srt Co-authored-by: Gabriel Vasile --- internal/magic/magic_test.go | 12 +++++ internal/magic/text.go | 51 +++++++++++++++++++++ mimetype_test.go | 88 +++++++++++++++++++----------------- supported_mimes.md | 3 +- testdata/not.srt.2.txt | 20 ++++++++ testdata/not.srt.txt | 20 ++++++++ testdata/srt.srt | 20 ++++++++ tree.go | 8 ++-- 8 files changed, 177 insertions(+), 45 deletions(-) create mode 100644 testdata/not.srt.2.txt create mode 100644 testdata/not.srt.txt create mode 100644 testdata/srt.srt diff --git a/internal/magic/magic_test.go b/internal/magic/magic_test.go index d0a5d8a..04490a6 100644 --- a/internal/magic/magic_test.go +++ b/internal/magic/magic_test.go @@ -112,3 +112,15 @@ func TestDropLastLine(t *testing.T) { } } } + +func BenchmarkSrt(b *testing.B) { + const subtitle = `1 +00:02:16,612 --> 00:02:19,376 +Senator, we're making +our final approach into Coruscant. + +` + for i := 0; i < b.N; i++ { + Srt([]byte(subtitle), 0) + } +} diff --git a/internal/magic/text.go b/internal/magic/text.go index b807dc5..d55c6af 100644 --- a/internal/magic/text.go +++ b/internal/magic/text.go @@ -3,6 +3,8 @@ package magic import ( "bufio" "bytes" + "strings" + "time" "github.com/gabriel-vasile/mimetype/internal/charset" "github.com/gabriel-vasile/mimetype/internal/json" @@ -297,3 +299,52 @@ func HAR(raw []byte, limit uint32) bool { func Svg(raw []byte, limit uint32) bool { return bytes.Contains(raw, []byte(" 00:02:19,376) limits secondLine + // length to exactly 29 characters. + if len(secondLine) != 29 { + return false + } + // Decimal separator of fractional seconds in the timestamps must be a + // comma, not a period. + if strings.Contains(secondLine, ".") { + return false + } + // For Go <1.17, comma is not recognised as a decimal separator by `time.Parse`. + secondLine = strings.ReplaceAll(secondLine, ",", ".") + // Second line must be a time range. + ts := strings.Split(secondLine, " --> ") + if len(ts) != 2 { + return false + } + const layout = "15:04:05.000" + t0, err := time.Parse(layout, ts[0]) + if err != nil { + return false + } + t1, err := time.Parse(layout, ts[1]) + if err != nil { + return false + } + if t0.After(t1) { + return false + } + + // A third line must exist and not be empty. This is the actual subtitle text. + return s.Scan() && len(s.Bytes()) != 0 +} diff --git a/mimetype_test.go b/mimetype_test.go index cb70160..d2ef7df 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -171,47 +171,53 @@ var files = map[string]string{ "shx.shx": "application/octet-stream", "so.so": "application/x-sharedlib", "sqlite.sqlite": "application/vnd.sqlite3", - "svg.1.svg": "image/svg+xml", - "svg.svg": "image/svg+xml", - "swf.swf": "application/x-shockwave-flash", - "tar.tar": "application/x-tar", - "tcl.tcl": "text/x-tcl", - "tcx.tcx": "application/vnd.garmin.tcx+xml", - "tiff.tiff": "image/tiff", - "torrent.torrent": "application/x-bittorrent", - "tsv.tsv": "text/tab-separated-values", - "ttf.ttf": "font/ttf", - "tzfile": "application/tzif", - "utf16bebom.txt": "text/plain; charset=utf-16be", - "utf16lebom.txt": "text/plain; charset=utf-16le", - "utf32bebom.txt": "text/plain; charset=utf-32be", - "utf32lebom.txt": "text/plain; charset=utf-32le", - "utf8.txt": "text/plain; charset=utf-8", - "utf8ctrlchars": "application/octet-stream", - "vcf.dos.vcf": "text/vcard", - "vcf.vcf": "text/vcard", - "voc.voc": "audio/x-unknown", - "warc.warc": "application/warc", - "wasm.wasm": "application/wasm", - "wav.wav": "audio/wav", - "webm.webm": "video/webm", - "webp.webp": "image/webp", - "woff.woff": "font/woff", - "woff2.woff2": "font/woff2", - "x3d.x3d": "model/x3d+xml", - "xar.xar": "application/x-xar", - "xcf.xcf": "image/x-xcf", - "xfdf.xfdf": "application/vnd.adobe.xfdf", - "xlf.xlf": "application/x-xliff+xml", - "xls.xls": "application/vnd.ms-excel", - "xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xml.xml": "text/xml; charset=utf-8", - "xml.withbr.xml": "text/xml; charset=utf-8", - "xz.xz": "application/x-xz", - "zip.zip": "application/zip", - "zst.zst": "application/zstd", + "srt.srt": "text/x-subrip", + // not.srt.txt uses periods instead of commas for the decimal separators of + // the timestamps. + "not.srt.txt": "text/plain; charset=utf-8", + // not.srt.2.txt does not specify milliseconds. + "not.srt.2.txt": "text/plain; charset=utf-8", + "svg.1.svg": "image/svg+xml", + "svg.svg": "image/svg+xml", + "swf.swf": "application/x-shockwave-flash", + "tar.tar": "application/x-tar", + "tcl.tcl": "text/x-tcl", + "tcx.tcx": "application/vnd.garmin.tcx+xml", + "tiff.tiff": "image/tiff", + "torrent.torrent": "application/x-bittorrent", + "tsv.tsv": "text/tab-separated-values", + "ttf.ttf": "font/ttf", + "tzfile": "application/tzif", + "utf16bebom.txt": "text/plain; charset=utf-16be", + "utf16lebom.txt": "text/plain; charset=utf-16le", + "utf32bebom.txt": "text/plain; charset=utf-32be", + "utf32lebom.txt": "text/plain; charset=utf-32le", + "utf8.txt": "text/plain; charset=utf-8", + "utf8ctrlchars": "application/octet-stream", + "vcf.dos.vcf": "text/vcard", + "vcf.vcf": "text/vcard", + "voc.voc": "audio/x-unknown", + "warc.warc": "application/warc", + "wasm.wasm": "application/wasm", + "wav.wav": "audio/wav", + "webm.webm": "video/webm", + "webp.webp": "image/webp", + "woff.woff": "font/woff", + "woff2.woff2": "font/woff2", + "x3d.x3d": "model/x3d+xml", + "xar.xar": "application/x-xar", + "xcf.xcf": "image/x-xcf", + "xfdf.xfdf": "application/vnd.adobe.xfdf", + "xlf.xlf": "application/x-xliff+xml", + "xls.xls": "application/vnd.ms-excel", + "xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xml.xml": "text/xml; charset=utf-8", + "xml.withbr.xml": "text/xml; charset=utf-8", + "xz.xz": "application/x-xz", + "zip.zip": "application/zip", + "zst.zst": "application/zstd", } func TestDetect(t *testing.T) { diff --git a/supported_mimes.md b/supported_mimes.md index 55ccee6..b966a4f 100644 --- a/supported_mimes.md +++ b/supported_mimes.md @@ -1,4 +1,4 @@ -## 166 Supported MIME types +## 167 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -163,6 +163,7 @@ Extension | MIME type | Aliases **.har** | application/json | - **.ndjson** | application/x-ndjson | - **.rtf** | text/rtf | - +**.srt** | text/x-subrip | text/x-srt **.tcl** | text/x-tcl | application/x-tcl **.csv** | text/csv | - **.tsv** | text/tab-separated-values | - diff --git a/testdata/not.srt.2.txt b/testdata/not.srt.2.txt new file mode 100644 index 0000000..78bebbc --- /dev/null +++ b/testdata/not.srt.2.txt @@ -0,0 +1,20 @@ +1 +00:02:16 --> 00:02:19 +Senator, we're making +our final approach into Coruscant. + +2 +00:02:19 --> 00:02:21 +Very good, Lieutenant. + +3 +00:03:13 --> 00:03:15 +We made it. + +4 +00:03:18 --> 00:03:20 +I guess I was wrong. + +5 +00:03:20 --> 00:03:22 +There was no danger at all. diff --git a/testdata/not.srt.txt b/testdata/not.srt.txt new file mode 100644 index 0000000..338c9ee --- /dev/null +++ b/testdata/not.srt.txt @@ -0,0 +1,20 @@ +1 +00:02:16.612 --> 00:02:19.376 +Senator, we're making +our final approach into Coruscant. + +2 +00:02:19.482 --> 00:02:21.609 +Very good, Lieutenant. + +3 +00:03:13.336 --> 00:03:15.167 +We made it. + +4 +00:03:18.608 --> 00:03:20.371 +I guess I was wrong. + +5 +00:03:20.476 --> 00:03:22.671 +There was no danger at all. diff --git a/testdata/srt.srt b/testdata/srt.srt new file mode 100644 index 0000000..e16f344 --- /dev/null +++ b/testdata/srt.srt @@ -0,0 +1,20 @@ +1 +00:02:16,612 --> 00:02:19,376 +Senator, we're making +our final approach into Coruscant. + +2 +00:02:19,482 --> 00:02:21,609 +Very good, Lieutenant. + +3 +00:03:13,336 --> 00:03:15,167 +We made it. + +4 +00:03:18,608 --> 00:03:20,371 +I guess I was wrong. + +5 +00:03:20,476 --> 00:03:22,671 +There was no danger at all. diff --git a/tree.go b/tree.go index dfd3227..6f8578e 100644 --- a/tree.go +++ b/tree.go @@ -76,7 +76,7 @@ var ( alias("application/x-ogg") oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio) oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo) - text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, tcl, csv, tsv, vCard, iCalendar, warc) + text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc) xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2) json = newMIME("application/json", ".json", magic.JSON, geoJSON, har) har = newMIME("application/json", ".har", magic.HAR) @@ -87,8 +87,10 @@ var ( html = newMIME("text/html", ".html", magic.HTML) php = newMIME("text/x-php", ".php", magic.Php) rtf = newMIME("text/rtf", ".rtf", magic.Rtf) - js = newMIME("application/javascript", ".js", magic.Js). - alias("application/x-javascript", "text/javascript") + srt = newMIME("text/x-subrip", ".srt", magic.Srt). + alias("text/x-srt") + js = newMIME("application/javascript", ".js", magic.Js). + alias("application/x-javascript", "text/javascript") lua = newMIME("text/x-lua", ".lua", magic.Lua) perl = newMIME("text/x-perl", ".pl", magic.Perl) python = newMIME("application/x-python", ".py", magic.Python) From 49b96dc12e8b7fb3caa9db8b7fd8a488eaa68e0e Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sun, 16 Jan 2022 23:33:23 +0200 Subject: [PATCH 03/16] Pin dependency versions in github actions --- .github/workflows/codeql.yml | 9 +++------ .github/workflows/go.yml | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index c13a885..8cd5876 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -5,8 +5,6 @@ on: branches: [master] pull_request: branches: [master] - schedule: - - cron: "22 11 * * *" jobs: CodeQL-Build: @@ -14,14 +12,13 @@ jobs: steps: - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v2.4.0 - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v1.0.0 with: languages: go queries: security-and-quality - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 - + uses: github/codeql-action/analyze@v1.0.0 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a4d5044..5d42c70 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -16,7 +16,7 @@ jobs: - name: Run linters uses: golangci/golangci-lint-action@v2.5.2 with: - version: v1.29 + version: "v1.37.1" test: strategy: From 5693b05be1659f2981ea415c68dd75679fbf3998 Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sun, 16 Jan 2022 23:53:02 +0200 Subject: [PATCH 04/16] Use 1.12 and latest for go test version in github actions --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 5d42c70..9c423d2 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -21,7 +21,7 @@ jobs: test: strategy: matrix: - go-version: [1.16] + go-version: ["1.12.0", "latest"] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: From 1498ae5f9a6bdedda39f8328b191b676d66d622c Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sun, 16 Jan 2022 23:53:47 +0200 Subject: [PATCH 05/16] Use ioutil.ReadAll instead io.ReadAll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit io.ReadAll was introduced in 1.16. This change makeѕ the package compatible with versions down to 1.12. --- internal/magic/magic_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/magic/magic_test.go b/internal/magic/magic_test.go index 04490a6..abf5c55 100644 --- a/internal/magic/magic_test.go +++ b/internal/magic/magic_test.go @@ -1,7 +1,7 @@ package magic import ( - "io" + "io/ioutil" "testing" ) @@ -106,7 +106,7 @@ func TestDropLastLine(t *testing.T) { } for i, tt := range dropTests { gotR := dropLastLine([]byte(tt.raw), tt.cutAt) - got, _ := io.ReadAll(gotR) + got, _ := ioutil.ReadAll(gotR) if got := string(got); got != tt.res { t.Errorf("dropLastLine %d error: expected %q; got %q", i, tt.res, got) } From 993bbbf18d9d9eb0ba61ddb63c0b33e1aff242da Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Mon, 17 Jan 2022 00:02:20 +0200 Subject: [PATCH 06/16] replace latest with caret version in gh actions actions/go-versions does not support latest as a valid version. Instead, use a caret version equivalent to latest. --- .github/workflows/go.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 9c423d2..9ad023f 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -21,7 +21,7 @@ jobs: test: strategy: matrix: - go-version: ["1.12.0", "latest"] + go-version: ["1.12.0", "^1.17.6"] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: From 4d3e6db0168c1f0de30073976bbf023a4c28f26f Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Mon, 17 Jan 2022 00:08:14 +0200 Subject: [PATCH 07/16] prevent gh action tests run twice from: once for PR event and once for push event to: for PRs and pushes to master --- .github/workflows/go.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 9ad023f..11eb3a0 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -1,5 +1,7 @@ on: push: + branches: + - master pull_request: name: run tests From 568fc39ed45d666736d23b0286046612fed5014c Mon Sep 17 00:00:00 2001 From: Dovydas Joksas Date: Thu, 20 Jan 2022 16:28:09 +0000 Subject: [PATCH 08/16] Add support for WebVTT (#223) * Add support for WebVTT Use for magic numbers. * Add VTT signatures that include EOF * Remove pointy brackets around URL * Simplify Vtt * delete bytePointer --- internal/magic/text.go | 25 +++++++++++++++++++++++ mimetype_test.go | 4 ++++ supported_mimes.md | 3 ++- testdata/vtt.eof.vtt | 1 + testdata/vtt.space.vtt | 1 + testdata/vtt.tab.vtt | 1 + testdata/vtt.vtt | 45 ++++++++++++++++++++++++++++++++++++++++++ tree.go | 11 ++++++----- 8 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 testdata/vtt.eof.vtt create mode 100644 testdata/vtt.space.vtt create mode 100644 testdata/vtt.tab.vtt create mode 100644 testdata/vtt.vtt diff --git a/internal/magic/text.go b/internal/magic/text.go index d55c6af..e2a03ca 100644 --- a/internal/magic/text.go +++ b/internal/magic/text.go @@ -348,3 +348,28 @@ func Srt(in []byte, _ uint32) bool { // A third line must exist and not be empty. This is the actual subtitle text. return s.Scan() && len(s.Bytes()) != 0 } + +// Vtt matches a Web Video Text Tracks (WebVTT) file. See +// https://www.iana.org/assignments/media-types/text/vtt. +func Vtt(raw []byte, limit uint32) bool { + // Prefix match. + prefixes := [][]byte{ + {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // UTF-8 BOM, "WEBVTT" and a line feed + {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // UTF-8 BOM, "WEBVTT" and a carriage return + {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // UTF-8 BOM, "WEBVTT" and a space + {0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // UTF-8 BOM, "WEBVTT" and a horizontal tab + {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0A}, // "WEBVTT" and a line feed + {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x0D}, // "WEBVTT" and a carriage return + {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x20}, // "WEBVTT" and a space + {0x57, 0x45, 0x42, 0x56, 0x54, 0x54, 0x09}, // "WEBVTT" and a horizontal tab + } + for _, p := range prefixes { + if bytes.HasPrefix(raw, p) { + return true + } + } + + // Exact match. + return bytes.Equal(raw, []byte{0xEF, 0xBB, 0xBF, 0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) || // UTF-8 BOM and "WEBVTT" + bytes.Equal(raw, []byte{0x57, 0x45, 0x42, 0x56, 0x54, 0x54}) // "WEBVTT" +} diff --git a/mimetype_test.go b/mimetype_test.go index d2ef7df..5b2aacb 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -197,6 +197,10 @@ var files = map[string]string{ "vcf.dos.vcf": "text/vcard", "vcf.vcf": "text/vcard", "voc.voc": "audio/x-unknown", + "vtt.vtt": "text/vtt", + "vtt.space.vtt": "text/vtt", + "vtt.tab.vtt": "text/vtt", + "vtt.eof.vtt": "text/vtt", "warc.warc": "application/warc", "wasm.wasm": "application/wasm", "wav.wav": "audio/wav", diff --git a/supported_mimes.md b/supported_mimes.md index b966a4f..0b13521 100644 --- a/supported_mimes.md +++ b/supported_mimes.md @@ -1,4 +1,4 @@ -## 167 Supported MIME types +## 168 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -170,3 +170,4 @@ Extension | MIME type | Aliases **.vcf** | text/vcard | - **.ics** | text/calendar | - **.warc** | application/warc | - +**.vtt** | text/vtt | - diff --git a/testdata/vtt.eof.vtt b/testdata/vtt.eof.vtt new file mode 100644 index 0000000..af1827d --- /dev/null +++ b/testdata/vtt.eof.vtt @@ -0,0 +1 @@ +WEBVTT \ No newline at end of file diff --git a/testdata/vtt.space.vtt b/testdata/vtt.space.vtt new file mode 100644 index 0000000..6abbf42 --- /dev/null +++ b/testdata/vtt.space.vtt @@ -0,0 +1 @@ +WEBVTT \ No newline at end of file diff --git a/testdata/vtt.tab.vtt b/testdata/vtt.tab.vtt new file mode 100644 index 0000000..e28ec9b --- /dev/null +++ b/testdata/vtt.tab.vtt @@ -0,0 +1 @@ +WEBVTT \ No newline at end of file diff --git a/testdata/vtt.vtt b/testdata/vtt.vtt new file mode 100644 index 0000000..22cc7ae --- /dev/null +++ b/testdata/vtt.vtt @@ -0,0 +1,45 @@ +WEBVTT +Kind: captions +Language: en + +00:09.000 --> 00:11.000 +We are in New York City + +00:11.000 --> 00:13.000 +We are in New York City + +00:13.000 --> 00:16.000 +We're actually at the Lucern Hotel, just down the street + +00:16.000 --> 00:18.000 +from the American Museum of Natural History + +00:18.000 --> 00:20.000 +And with me is Neil deGrasse Tyson + +00:20.000 --> 00:22.000 +Astrophysicist, Director of the Hayden Planetarium + +00:22.000 --> 00:24.000 +at the AMNH. + +00:24.000 --> 00:26.000 +Thank you for walking down here. + +00:27.000 --> 00:30.000 +And I want to do a follow-up on the last conversation we did. + +00:30.000 --> 00:31.500 align:end size:50% +When we e-mailed— + +00:30.500 --> 00:32.500 align:start size:50% +Didn't we talk about enough in that conversation? + +00:32.000 --> 00:35.500 align:end size:50% +No! No no no no; 'cos 'cos obviously 'cos + +00:32.500 --> 00:33.500 align:start size:50% +Laughs + +00:35.500 --> 00:38.000 +You know I'm so excited my glasses are falling off here. diff --git a/tree.go b/tree.go index 6f8578e..f98650b 100644 --- a/tree.go +++ b/tree.go @@ -76,7 +76,7 @@ var ( alias("application/x-ogg") oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio) oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo) - text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc) + text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc, vtt) xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2) json = newMIME("application/json", ".json", magic.JSON, geoJSON, har) har = newMIME("application/json", ".har", magic.HAR) @@ -87,10 +87,11 @@ var ( html = newMIME("text/html", ".html", magic.HTML) php = newMIME("text/x-php", ".php", magic.Php) rtf = newMIME("text/rtf", ".rtf", magic.Rtf) - srt = newMIME("text/x-subrip", ".srt", magic.Srt). - alias("text/x-srt") - js = newMIME("application/javascript", ".js", magic.Js). - alias("application/x-javascript", "text/javascript") + js = newMIME("application/javascript", ".js", magic.Js). + alias("application/x-javascript", "text/javascript") + srt = newMIME("text/x-subrip", ".srt", magic.Srt). + alias("text/x-srt") + vtt = newMIME("text/vtt", ".vtt", magic.Vtt) lua = newMIME("text/x-lua", ".lua", magic.Lua) perl = newMIME("text/x-perl", ".pl", magic.Perl) python = newMIME("application/x-python", ".py", magic.Python) From ad1f555d7f68004983512d3881f10598363936f6 Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sat, 29 Jan 2022 18:00:26 +0200 Subject: [PATCH 09/16] Add missing test case for msi files --- mimetype_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/mimetype_test.go b/mimetype_test.go index 5b2aacb..7bb6aea 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -125,6 +125,7 @@ var files = map[string]string{ "mpeg.mpeg": "video/mpeg", "mqv.mqv": "video/quicktime", "mrc.mrc": "application/marc", + "msi.msi": "application/x-ms-installer", "msg.msg": "application/vnd.ms-outlook", "ndjson.xl.ndjson": "application/x-ndjson", "ndjson.ndjson": "application/x-ndjson", From 859a07a7bb90cab9192837aab467dd6a89c742ae Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sat, 29 Jan 2022 18:00:51 +0200 Subject: [PATCH 10/16] Fix detection for CFB files version 4. For #231 Previously the offset for the CLSID was searched for using v3 offsets. This commit changes detection to check CFB version in order to choose between v3(512) and v4(4096) offsets. https://www.loc.gov/preservation/digital/formats/fdd/fdd000392.shtml --- internal/magic/ms_office.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/internal/magic/ms_office.go b/internal/magic/ms_office.go index 0adee62..ae601f6 100644 --- a/internal/magic/ms_office.go +++ b/internal/magic/ms_office.go @@ -190,15 +190,22 @@ func Msi(raw []byte, limit uint32) bool { // // http://fileformats.archiveteam.org/wiki/Microsoft_Compound_File func matchOleClsid(in []byte, clsid []byte) bool { - if len(in) <= 512 { + // Microsoft Compound files v3 have a sector length of 512, while v4 has 4096. + // Change sector offset depending on file version. + // https://www.loc.gov/preservation/digital/formats/fdd/fdd000392.shtml + sectorLength := 512 + if len(in) < sectorLength { return false } + if in[26] == 0x04 && in[27] == 0x00 { + sectorLength = 4096 + } - // SecID of first sector of the directory stream + // SecID of first sector of the directory stream. firstSecID := int(binary.LittleEndian.Uint32(in[48:52])) - // Expected offset of CLSID for root storage object - clsidOffset := 512*(1+firstSecID) + 80 + // Expected offset of CLSID for root storage object. + clsidOffset := sectorLength*(1+firstSecID) + 80 if len(in) <= clsidOffset+16 { return false From 2345dc1e1cca5dbf025d9fdeaa50a02cc478e5e3 Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Sun, 30 Jan 2022 23:36:26 +0200 Subject: [PATCH 11/16] Fix json detected as dbf when it starts with low ascii values Byte 0x09 represents ninth month in dbf header, but it is also the ascii value of . This made dbf detection pass for json and txt files happening to contain these low ascii values at the start. This commit improves dbf detection to check for several null bytes in the input. json and txt files never contain null bytes. --- internal/magic/binary.go | 17 ++++++++++++++--- mimetype_test.go | 1 + testdata/json.lowascii.json | 2 ++ 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 testdata/json.lowascii.json diff --git a/internal/magic/binary.go b/internal/magic/binary.go index feca467..83ba2d0 100644 --- a/internal/magic/binary.go +++ b/internal/magic/binary.go @@ -64,16 +64,27 @@ func MachO(raw []byte, limit uint32) bool { // Dbf matches a dBase file. // https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm func Dbf(raw []byte, limit uint32) bool { - if len(raw) < 4 { + if len(raw) < 68 { return false } - // 3rd and 4th bytes contain the last update month and day of month + // 3rd and 4th bytes contain the last update month and day of month. if !(0 < raw[2] && raw[2] < 13 && 0 < raw[3] && raw[3] < 32) { return false } - // dbf type is dictated by the first byte + // 12, 13, 30, 31 are reserved bytes and always filled with 0x00. + if raw[12] != 0x00 || raw[13] != 0x00 || raw[30] != 0x00 || raw[31] != 0x00 { + return false + } + // Production MDX flag; + // 0x01 if a production .MDX file exists for this table; + // 0x00 if no .MDX file exists. + if raw[28] > 0x01 { + return false + } + + // dbf type is dictated by the first byte. dbfTypes := []byte{ 0x02, 0x03, 0x04, 0x05, 0x30, 0x31, 0x32, 0x42, 0x62, 0x7B, 0x82, 0x83, 0x87, 0x8A, 0x8B, 0x8E, 0xB3, 0xCB, 0xE5, 0xF5, 0xF4, 0xFB, diff --git a/mimetype_test.go b/mimetype_test.go index 7bb6aea..0f6fe12 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -94,6 +94,7 @@ var files = map[string]string{ "xpm.xpm": "image/x-xpixmap", "js.js": "application/javascript", "json.json": "application/json", + "json.lowascii.json": "application/json", // json.{int,float,string}.txt contain a single JSON value. They are valid JSON // documents, but they should not be detected as application/json. This mimics // the behaviour of the file utility and seems the correct thing to do. diff --git a/testdata/json.lowascii.json b/testdata/json.lowascii.json new file mode 100644 index 0000000..36f2f4c --- /dev/null +++ b/testdata/json.lowascii.json @@ -0,0 +1,2 @@ +{ + "fixture for issue #": 239} From 20164ff346f7ddccf78d048af49021e27b15512a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Feb 2022 11:17:44 +0000 Subject: [PATCH 12/16] Bump github/codeql-action from 1.0.0 to 1.0.31 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 1.0.0 to 1.0.31. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v1.0.0...v1.0.31) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 8cd5876..6387e76 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -15,10 +15,10 @@ jobs: uses: actions/checkout@v2.4.0 - name: Initialize CodeQL - uses: github/codeql-action/init@v1.0.0 + uses: github/codeql-action/init@v1.0.31 with: languages: go queries: security-and-quality - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1.0.0 + uses: github/codeql-action/analyze@v1.0.31 From 98e32219d1ec148b0d34184cc4a86e7725b17370 Mon Sep 17 00:00:00 2001 From: Nick van Ravenzwaaij <43145188+n-vr@users.noreply.github.com> Date: Wed, 2 Feb 2022 16:54:06 +0100 Subject: [PATCH 13/16] Improve TzIf format detection (#237) Additional header checks: - Header length - Version - typecnt MUST not be zero --- internal/magic/binary.go | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/internal/magic/binary.go b/internal/magic/binary.go index 83ba2d0..29bdded 100644 --- a/internal/magic/binary.go +++ b/internal/magic/binary.go @@ -17,8 +17,6 @@ var ( Elf = prefix([]byte{0x7F, 0x45, 0x4C, 0x46}) // Nes matches a Nintendo Entertainment system ROM file. Nes = prefix([]byte{0x4E, 0x45, 0x53, 0x1A}) - // TzIf matches a Time Zone Information Format (TZif) file. - TzIf = prefix([]byte("TZif")) // SWF matches an Adobe Flash swf file. SWF = prefix([]byte("CWS"), []byte("FWS"), []byte("ZWS")) // Torrent has bencoded text in the beginning. @@ -166,3 +164,33 @@ func Marc(raw []byte, limit uint32) bool { // | g l T F | 1 | ... | var Glb = prefix([]byte("\x67\x6C\x54\x46\x02\x00\x00\x00"), []byte("\x67\x6C\x54\x46\x01\x00\x00\x00")) + +// TzIf matches a Time Zone Information Format (TZif) file. +// See more: https://tools.ietf.org/id/draft-murchison-tzdist-tzif-00.html#rfc.section.3 +// Its header structure is shown below: +// +---------------+---+ +// | magic (4) | <-+-- version (1) +// +---------------+---+---------------------------------------+ +// | [unused - reserved for future use] (15) | +// +---------------+---------------+---------------+-----------+ +// | isutccnt (4) | isstdcnt (4) | leapcnt (4) | +// +---------------+---------------+---------------+ +// | timecnt (4) | typecnt (4) | charcnt (4) | +func TzIf(raw []byte, limit uint32) bool { + // File is at least 44 bytes (header size). + if len(raw) < 44 { + return false + } + + if !bytes.HasPrefix(raw, []byte("TZif")) { + return false + } + + // Field "typecnt" MUST not be zero. + if binary.BigEndian.Uint32(raw[36:40]) == 0 { + return false + } + + // Version has to be NUL (0x00), '2' (0x32) or '3' (0x33). + return raw[4] == 0x00 || raw[4] == 0x32 || raw[4] == 0x33 +} From 04bab86448442e09b59c650c0ca343da57b33f9e Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Wed, 2 Feb 2022 19:49:17 +0200 Subject: [PATCH 14/16] Use CLSID for Office 97-2003 detection Previously, all OLE storage files which where not identified as a more specific format were defaulting to doc. This commit makes doc detection use CLSID UUIDs, similar to what ppt and xls do. --- internal/magic/ms_office.go | 26 ++++++++++++++++++-------- mimetype_test.go | 1 - testdata/doc.1.doc | Bin 45568 -> 0 bytes testdata/doc.doc | Bin 22528 -> 8704 bytes 4 files changed, 18 insertions(+), 9 deletions(-) delete mode 100644 testdata/doc.1.doc diff --git a/internal/magic/ms_office.go b/internal/magic/ms_office.go index ae601f6..5964ce5 100644 --- a/internal/magic/ms_office.go +++ b/internal/magic/ms_office.go @@ -78,14 +78,24 @@ func Aaf(raw []byte, limit uint32) bool { } // Doc matches a Microsoft Word 97-2003 file. -// -// BUG(gabriel-vasile): Doc should look for subheaders like Ppt and Xls does. -// -// Ole is a container for Doc, Ppt, Pub and Xls. -// Right now, when an Ole file is detected, it is considered to be a Doc file -// if the checks for Ppt, Pub and Xls failed. -func Doc(raw []byte, limit uint32) bool { - return true +// See: https://github.com/decalage2/oletools/blob/412ee36ae45e70f42123e835871bac956d958461/oletools/common/clsid.py +func Doc(raw []byte, _ uint32) bool { + clsids := [][]byte{ + // Microsoft Word 97-2003 Document (Word.Document.8) + {0x06, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}, + // Microsoft Word 6.0-7.0 Document (Word.Document.6) + {0x00, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}, + // Microsoft Word Picture (Word.Picture.8) + {0x07, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46}, + } + + for _, clsid := range clsids { + if matchOleClsid(raw, clsid) { + return true + } + } + + return false } // Ppt matches a Microsoft PowerPoint 97-2003 file or a PowerPoint 95 presentation. diff --git a/mimetype_test.go b/mimetype_test.go index 0f6fe12..a493e31 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -49,7 +49,6 @@ var files = map[string]string{ "dcm.dcm": "application/dicom", "deb.deb": "application/vnd.debian.binary-package", "djvu.djvu": "image/vnd.djvu", - "doc.1.doc": "application/msword", "doc.doc": "application/msword", "docx.1.docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx.docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", diff --git a/testdata/doc.1.doc b/testdata/doc.1.doc deleted file mode 100644 index f7f0659b34504e070363255f1836d9447599bafe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 45568 zcmeI536NdYncvUre%)#5Z zzIXbb``&%8pA^Z)j-K@OyUSU=^X=z5=ic{?ug`kvP?O)y>yN(f9d+1%icwMS=?it%AjZHo+3XQo)6ScERrnE)rZU=nz~Y zSSEOf;8MYIL8ss{L6@Lg&?D#-^a=U}D+B|ALBUGFkYJTywP1~4t>ALOuwXo#D&uin4w=;1pK?>*Lb!{MX5m#;f~+nopZ96IKG!qwZk zw(jsQ?+G~W)QP{}Ip}`2@^iOaLzVtY{;bnVg(}>8OO_;;(FAh+(m(&#Z~WudeNB&j zxWT>S;?I8vfAP2gx#m9h9ht|D7y!1p~M!2dlY0O?nMWxfo5 zdG+|w{p0$^x6#&?!$%K}AF%UUop;Mg4eQ(GpK0dYAGhB4g-34xcFrIFWaJ~CDjXmG z)*D|sIrBMCob*#}{NPvf+_TR;*txLjU;U}Zf9XGd2LD!9bgfw)ahj;{2i~Jwcel#5 zzkagD>07bD7Ws#x?w}iY8jkn(we4=a-F3I;M%vx7_MCj|=(60AeN^7MDevAX7~3?@ z9o%?+{tm%aqVu**Q`{jP9~11*@wo24XJan!j=Nk_P7%a$Ri5tV=ezQ~`L+2~f*!$8 zzRi31MZ(Z}t!vM9x4YR7zm%Kv@ZGN7}=!J^4ASX96uFAPR2U_flONZOLd z;59er;f_i)Xqg;?mI4NK!r+<;7!cVll7C8L@Zw|`Byw0*4onn}yLw@8a|H~DD;G&4 z(il8583uDEr@L*EPN2X61;N&vPv* zHL6;}`tH}!ayO#-S4lUARGYe^%WBa2lWy&@+*;q;5`8;2-!=Ps>s50wzfwI86?B>w z2L)qlCB}o;9BjLD#OU+f1uC`T6l!nuwMTNYG_ob}SbK!=8udORtOrwl&e7N#WULxw zteV`8O;cr*n%up8&|%v+{wwz1#cAzp44k8c+552#&c zJIzr;EpA1F>u)I3yuCtAY*<#!9^dC5<+z{SC$?w0kZw^&xp93NuTFm0^D4Uz)?!`; zo>cGiJ$^h#ne5tV`b^)WAD!>i@ce8AJkicHo>u2SR;a@)F{{ouE7URW#PqO!zF(mZ zv(R)uMupcs6=sy-HJY1MD^~;U%GE+f(zS*>#b?!^*X3%JlMk!Aq+(qpBulnzjNKKn zH7SUx=`G30ks2&XIc$-Ta`LspkM^B4$V0jM+$)wa!*;wPwx#l7wQjFit5jmF)*n@@ zRZeC;T%lHv&xxeDZ+(5dVy#k{v09(4Sj%ii%)3pZea?NWW|QbUH6xlMe!f)E8*zh= z-du~U#B`ZOa9*S^afU2$qs-wVWobs`kH6*RU}i;2OeSi%Yu-I4ms8uyLp}UK_22F; zYx-u*4{F+VT<@01J`c)L6H^kscPf^lr%|_5y}VH~s!|Q|J6-t!S^F;Ud-_$fQJJRM z!WDdHDAsK**rlAI+2U?$$aVTr*VK}in(8j}{&7HU;44<;H>;I8@rByf=OLAU{bY0P zA>U_fzFR)(3iWuEd?lQiB|C31#+iw+*SfV$vuc;tqEQV6uB;M%J)#I>Y*Os0xH<}| z;l@U@_j0M1`tkMDUaI=j6n7kuN7}DAqfHsFJ??IIOxJ7V_wwS~T(`q*Qg5TRJ9Kr1 zTP%wAiKfMIJ-zl>uM>fEyL{z&NA~UAyH`RnQ$m4?cFD!%wrx63+&WL~cPS29Or%)5 zPVF~W6=}f5e%JVPgR5EJ{4d&b%#B7}8_`}Ze4kZCyg?f)irN_VbU9i(tTq%4i7E&5 zk9=A&Z3dJZ&5Abe@{(|0?Okf2PBKb#WD>JjKHVfbNCHzWlkR8SNIRL$D6(i0wVJ$W zre|Gs(X4Bh$Y;T}JMR1aliK6zcbcE!wJC#nL?hVcIeUw)FIEp}404q)2(e8P@!O

DnG2Q4`lelT}HtJ5DUj-E=}}1A#H-Zt~R`7&OVS zWyE_#tvUDQTB6pO6EQ$mWKSM_wcByx8#U<;ojWM2VxQ}jBF{Sou=FYoqmM*iIGROea(mJ z;+i!oXEkbS0|cR<>M?;pyo$E&bB0bP)BP|@ z&&zh6<13UaXE%Pc8hZMC?LWs?uMlrHz8XhMv6}Anbzg46y6?-?NAp<10CS4w^8MH* z{><~0TWWWn*k%s6rMA9Q*@?CD3V>)Qw3VwZb^HJ`hiZrwujFG5eNi6&hrjd+tuRrDKgBkI zHscSIx$F-m>)j3dw?)fB2Q>kR&w5g4y{WUl)EQGWK1?C`)FAVGxUkL7_Q?RW%<9BnW3`1<7FN`b>DY2O+x?aOT6bf2SLHDc&{Ns=lN)0m zGkOC{(X_t0dXx1JZ>s5kvUUcl#HV;1@U*)A$F){zvLH#QvbSqRwNtOP-Scd_lmn^T z4u(Or6NZ!=m6HWz_YReg zMMOTumf2rC_orG4GH{&ChegKHqE=L(Y}VZPw?c(x^0pnsH2anAr!%#h>^43r`nr zgzw@~@i}GY#23mOe^=!Ob&s4)yS`WGJ6Hm$ENj({zsM5Yr~67Mm;F4d`xoi%H}p;a zFY9}TzNf04)A@XEsq*iU*w0lvb5EzCb9FogUK2ka)zRe_*pANMG+DkRJhntgAScMn zulrNuzVqwu@?3xCC~%GfZz~Ec(hd;zFRsnos{4@kA8gTXlzzVhW~;9D=^lHs*^fP} z_4gt9>?v-JUy1KJg<4Zot2f`NP^L3CZ`3ipmbOAU==#_sbg&(( z>^Wt(?grHYzc4QL3bi?QG>vGaj5>{DQhQ9b28BN~9?=n_U>ES9t|z-HLi_AOTCcm5 zV*jq~#qU+UmHA6W?bZIe?T@k@wd|^0tKC)XUEvB`>(DN!>|R^)OY@8Jt@%r|s|Xxu zZ;d$9uOs7CkJ^Jz?1N$F@k&oi_JBNArd4{R)++VBK{bqGS4^}4UPvl?bm3=@r&!l>Qs*W^@Et0L-N;6NurR*E+aS->H~ipokKYy0?f;t2463JjOZpT8 zFP9Dt=x?`#3c$ir=kj=`m3N{em)B9d_dl z>-ubsZa{LAUHkN|o{n@a7B*|86SfBnnj-<=*sEjYg?&lzf2**ihqTn49;Vr~dObhk zYtRI1 zaC_8SbF)5ZoN3eSHnZ4C-L8TRAQQoBB01m>ex^5DrN#CPMB0ziQ@8K0RJv#ZFbjg^ z#xfv(;VL}_dyBQTvG)718>VwK8f=Go?JsFx8a;v;PgbCP<~4$ySuMZbCP``ZmZD95 zxh+38KS#EDt}J$&{C#VFwxAhJ&Nt<)96OFEi#+yuenB5>U77;Hj-|1J>RW~3D(MqG zX{3U_m7@sU#t-+4n#c)qb(wGq)I(>XL7(SG;`J$KC809dqgR&;mk!3+ym%_y=p zb&035Y51t9ZMuqQSuEY>|%O4rNORWGcB=U>Jx;=(LqJ6ZF?Op3_n?-h}S3E(QwmM_pBVxIbiZ&O|fM}<5{9H|_C~Q@$ zGQYbecYTt$KK+9NZTWVO{Sx(^HRqwsEcJ}n*{E-953?jO}GMd6)#R;bbM9)-_X_GSTk!qacF@|Wf3>-+-!Yt1i_C*V5Yc!5@3 zwYug9vTdMQvaPbiMES;D^EDS}ghpHQ0iznd`R)Y6Q_}+IIOe4Vugv_;=1IsBJRI<{ ziRXi2#gc&?d`xlE9Pj&*&2$8^Sw z#%56*ag6W6-h(ku`1J{|4u2JEz|)sdV}t%eA1o&$=~iD?cnu(`vU!rt2Fz>W(THg9 z14OSjze;8kHanunY?)uIo@1Fu#v1+eF{KEdBh9@YE1S!hZ^3feY#hl? zw7%bK4LxEhpak(1_r)mv7i2fT?5X%iXd z$gm);yhLMBjKq;#pd@ zJPWi8^GYIuV$84_md_eiEowudU_r9{J(P&GB(Z%1>a{D2E9Ak1do8}f>gh(}Qvnyq1%Euhd>WhdzWVo@41ELSJ6@292 zBp%kX7;q=di^#i#*5G_%KW((JFESaiCdU&d^R6Up40Tg8-(10d8%$7 zAIBCM8NwzEsCD|mJ|%G$Eo8@UrhDnA3Vu`LY{ylxuUu2G_oqe`#%Z|@8*{n1;B5H# zR?!-6VIhz#<3^URuoxn-{6zjp&u@$U5BQpH|GK?^(F%Gce!#AQPw=Jz$1~**T4k3@ zqq8)3=~FSi%Ei9Va+G*Gd(V-r2N~_sFI;gN(7Mut1I|7Mg_f@vk<#8o5=DcEct;wIu9#$i4-?bEoA-z@ee7ROKzTY!Yb^FYfL&>t4H+Bre6OW07}D8LLI8qgYa zp{=w>g-XVq^*Rc7E%yTE_;za2VwNOiw^}M~lmn0k^U^kF38N>nGCGD?YKWptW4KGb zKx^|8>GAX{TCrTQWl&l*CvNt6VrIo0CHaC&g-bkFZuGMj;seXSTg1=WEYT6Y$1|)j zuY*E(N94_Fu*wQKTX9`MxCA8{(gI4%c{J-Y1zhGYe)W zVp$-wTQ;kVBX9B`$(jx0ORuHlX@g(EGFYD3q>+2Y97-q--LLSxOJZ>B4KiwPb%7FC zDN2rbP8Y96C6PPDNT{lQwU%J3M?^Rw8(ev=3LfcI zUeg!zp~exDHYk$jNhz+j>Vu`k$KV$%D`kE;-2&dhEO^?gajn=6gjrFD+_UB1`h%9S z^$r#iTr6q`JdWqJl{Lj||73b{y6XaowYG7W&-GUJP{;&!&i1f{wQiHy;2nk}GmMp0 zBD5#%*|U5cb}eKLjGJVdkT87k)&6C_3uf0rp-M*&SLE760NQ-w2F+I zmXX;GGjyU>vV69>$LtCmL)Np@w;7IRZO0~}2fJz(dr0=S%grFVZ)6 z(vf&Gv z2b1w^`RS01;VEr4OFRd?pdC_%WN#IvSVaq*P5Wcx7V?DjkU409Gc@dh)f-)3<7kk|FcJ&^tu! zmKO+K)}m$P98YGF3{@?=NsO6fDvicu7_4?FMG{XHVy!F-la?yuW-N)8vjqo>7A>+h zZ12#?0_Je5B|>HtP9o3#k*O2vFtRmlu*E~1q#?4 z2fIveE3q=^IT_ZMRdN|X~gcB&BEFtBNkVcjyS|y^cLb(qZ!tZRdbuio6dC?#%r-J@dKj@BVxRv z74zJsE8lTWFs)y3lkvr^fCI4_R=~!A{?T^nNZ8vJs2;Gut|XYSvXR(i8Esz@WL!iET5$s z3LgSj(p1L7(RR|)|4TpRV{fwyBa#lN5@#&3C}8^%%UdtYvBe`nhu{<%NM^?DO;+0> zj!JXGB!%n<8XEk5Y{$4tTUNJHf$tlx8Di ze%QF-cVg|~Q6kCst1M}@=mwmFP0x;rLKeqj4fFGfy$Erg?XQ3XBi={i7s5OP>l>mc zv@9J}a3xrC{5$wSJL6|)KaLx*^)aQHTj3py(ur+hu7mwxPY5$Hvk1hr^ck+M^mT3i z6|5;yXqtl9u4Y{gD>js1ADG3%%oWfb%g)(b`E4Jj#rfPz+QW9zlf^EUNdwoI)7&+R z7Rf_@+e3_{=DO|m2Hj^CXBhy-7JLIWnFZLMks-gv zM9iMf9|Rf)e)5Enjnl>o<(XaC94^deu~^AmbhBi|o?kNi7vvAGVbVv7*r2d`$Y^ZV z5LstuIO`FN2>z$#%`6}L6WGyZ{4?s{Z>SG%R;qo@lc>Vlw=7wy52{w1EP^qe%%&3f zU9PB&Y%aVuYB82kS=>H>MK@pyO*W~NJ-uMRs*q?C)DJ5*hJDZ``UhhuPrOq6JWf0t zv!3ngwPdGCvNOnL{cv@?_wb9oZpK+X)8x~!3yhw;3-d$j8@H1DIrtmLjH^lRgFB|@ zV;ZZ;bl6F1Yg@6lnO&{qw~f}w9lK1B7n2#NQ!0yG#e&6sSfpdwLZm%B;}T>KohWV{ zt5B|Wo_2|CN|uc`eeBSVCAf`^jO~5t-4M-p<*9>|gh)E9MkU%=TD#w8Dd9)yoGjs? z(YjQF>^k^`7!xYe+T{9-&peZIXMTX^dd%ttTWp+9{=G0xa_TW^bbAJ(z3IkqMi z=J?Ec(Be|wGkTsxvfY32Iv}!bnqf0e@T6DEU#G1dJK=0?nRjYJCH9$Lto+nmwSS@Z zpfA)pd$BLm-t%hLJiwhilwIYc+5m5?8$(ZXd<~En}1>17=y(fj6bX$ zVXe@E;Q3f#g}PkJvQwsg_~P=}Gjs?kGD})MUlyK(h-a)s8Q^Y_1Y8UmjM9A{tlrW( z{@i5QVv3k1VPrPu^jtac(4OF+*EOOGI=EKPb+c|dAUr~LmC_-?BwJXD582C7j*`s| zvu2AZOv8xZEQf^uAP&pQVc58+k$4h#gM}TIg~2Lv&NGSDUWfs0wU@CV@!(*#oZTwc z6TFP`p7wNuqAW>DhkPG-A1oMA92lB3;~AKZlgkaylBV0SHR-fu6F~$S##5Q6K$|RI zg6&4`_=~NMcWYa8RjSEENHMRG@M6mV$E?2nz7f*J6=GiFDDn>_pf6qvN?9h&X3=5p z9`^*F7pPPcNoGl2kN2ar72=#E0=8_iSvSiUCo6@lNQEZ}xtBz0!3UxprS$EP#+GlT zxO~VU+o;LXz;7F~tsrFa0!d4}JJuyw(`3JV@amS+iRp&ChbRy2Dz`trTkqVr3=>+0 ztb`RqdwvA1xA_Tjne3#l{y7qKB4`~WNa$u~r%UxvyOi2CpRtv#?5G2bihU29E*H%e z_hB-;R$L5Ej6yw=B-5?Qb;vAtsSZM~5dDW|T9UZI_IoG0APeQ3hAu(xO|C*LOJMRrzu~FKc;I`vPVC7j(Tw73J>JZA z(b-xKcp*z+t{SKg)*+@anTHO>OX@H(EVT8;bxMb5EYQpF!E?Z^fHzoCGArJqCE8BE z9l`8zc2pH+QPkt9b26>Sg2naBtCfHC9%HsJ3|^U74y`pzEel}3e8}k9{792?`g%-o5^kgekTPzEiG_jnbTv0g%4ZTt8V%3IbLEuh4p?+@-V%V^fs;v59=rBMW* zQEEw{Rjh?J>d?AH+HoEMOoEOyoC4wJ!TeGqElS)d%U zWKm|k+nL^~wLghg2f0r&Zq}#G#m$x=mun@f*kb6BJk?8|wwE>73e#__iFpmw!nRD>MPTBq&)K~7nIxgMTrGouGV<_I;@g-WAl1wTKwjRKDCYHB|TX~A2^>IXR{T|nErt!Y}+jKOnw~_NE@FS97-g$peXPr8~)3@K^pX!iy`nQGk z`na9Aq0c`*xgft-|Khuhmhm>~Jrc8(#k)Xvxz^)b(9b%m9e~+tpGpnqFO$`h#;Q!e zpx57HcCfNgi#y?8xA4bSEY|L|R((IF_h%SQIj+2~&feG1s2IU|WGu!Tu;HCCp_lMI z;IF}#MuGjC<@e6n`~}QtB`MdceRcMxI~!4N)an%fyh!}4k;O$>8N;oG_cFE0!Y47J z#WL9Oc!yiidN^IlH`~l^vbV7XdzO|h_%~kli|Q-1GSI7k%XK{9CAm>=H{l5-lU5|MSEYI-X+5fo?j^F!U6YPt zSw==zcv(gRdQ_WlBswk211d+mCSl#`CrA~tjjWTcL6({C;KMCD!~0hjll9gpTk<{y*yUFxe?-Wg%~A2{>ono?X|crfw8_J$eeA7IY7(Bp)1!Cp-M4bIi!Yj5}C-F~I- z{!3#Fe~d?^xpioYr6^rbB^CvYs`30ITqQyyqO@l`<}FF^Zj=>I=WT?e@Z6@~&A7s? z*U=ow&0Kw3)|Y!*+;#djj8J0xM47-vzR!Km3I+6@fBo2)8x-6sI3V~F!2^P41YZ;I zORx9pm%lzE_^RM}!A!lfd{FRR!3;5HQ1G(g6#+kL)+FE!`-6fZ!Ov#vH(3F_BL5!* z*Xxz`e=B%HP%}@z2_UE!3=2jDn+5j@{!nnKe(R%CaM$_H-7V-?sJHYBI`k&>Wr7a~ zJ}7ulZ&LpY!9#+d2woF>qRqL71@B&Bzg9bVq3{v>Tfr9vJM|L){Mzm7`uDQ_z3{@b zFFgBI!3)ni{d<8w?%B^j`^d9S$Awe7m_GH)na_{N>G!I4Y^w7=wj6#Iu8r&4bn(!} z^Yh09#RXgwCHT#_P{qyRN=Z52Lw{)xEun{!8VkAOe?z;jw7C(+KJh$ki`dp{F@aosySNQjp&kDRqxTT}Za!2-Y3?ue# zbh+EVCIt$=(G1Vs{Ly9pZY(0)SWvhwh8N#*UJ?SgXDpi%1gD&YWXjWe#`=E22LwMB z{6fGlId}cO=qwl#9249n_>|z&f(7pue+8|AHo;OsyWk?hKEbVm#|57eJR$g;;5ES; zf}aWA6m;sRr@I8#3a%IYNbqAptA0VcP0%427VwMG8w4*1zAJc9@IApxf~7a9U%{n< zPC=JogJ7fJU4nNDJ}CH*;C{ik1^lA*Pu)-T-~IjHyXQQwzM_vEh-v=FpSiz&(mna) zlb?6`_g9bnmHTt|$mflv{_kwhnVEX{bM8O6r@rvG{yp`VkLzDVyx+0ISz}_p z7=sMl*pR!fAvZI91#OK>)ZPjORIY*RRCBGEha%>xt-yY_h>}6LjKNKbw}>ULDU_P1 z8~9%=7Yf4~WTOT6{X+u$K0g0%1^E3pV^00=_~gO(q+-iI6czZ2B zIykRAH@`i1<=&iXG(6wsYW`VnTl1UkmFma$rarIM6oWjkV8j0;ru{4?e+5UfFZVsK z--plD%Dr$gUJv*fJ&y`P_Sv)~;uIq^8Lc!azsDzEk5B$4K1osj$@tDAXY}Ngacd97Cw~^7d?Y?8dt5~MM8Euz z^fS;pUj^TMok=9co$dQgVUEQ7h`CXi7joz&KKj8 zPsJw>#3%Q~Cr9EF#gb(|vsNW8uDjjEIyn1b=jI69|IcNnr|C06lSGcE!B^&a%IW$3>d-}#+Bi3fU0#|q9S;<)DBueqEH ze2n8cyHg*PtxEFe!mD)x=GevaYv$HrUd>!O%%hnzJ|2%DrPrr(@~3fYABa!hTir>j zfz(M{-_2fGOv@c7{%#^73v_Kvm1@(kTHBpx+<7H;;`%A-Uz0I|dEn&tXH`FgnexET z8<@r9T&^&S(XYVUaQHcR9TjFV#g|WE7SjkXnfw1h5a$0s)bUSax;+%1q#~pD#dj16 zmj9dydc3dxEYqVp`8eZzK3;d0$EApK!iFSXa-DN^@H*ss$n`uV_=MnL!KVb|a`+wK zU&WmLr}!i#DL;zu{4hRwB|dpMKKVg>@=|K^TZmW5k6wE ztDg#0wO;7|x()V~{tNw%7l@d1W9t8)_MB~XH@jQY1*2}v@=iSYxjl;}B|MbnL&}_voXf;Q9K0bAByY}}T z)W31h`zs{{d&QBX{t7z-59><#Plx(P@6q{c`-fI97Mi@l_qE%t)Aufq$sR99(E6vxlJd2c#nE1iy5YE^iJN*Wf;w0yZDtyDBk#Ueyp0#c!omS$*CqcIQ{ zY$tJ{iHQy*CN9TFqC3mRjSIVUi3tl6NgTk13*q@Y_q|tN#||^ogrrR0nf%|mALpEV z@45Hgd)}R;U%GBy{kHcvsiFgtlm}i$QjzimX@#Mzh($v44?NHF3q?rA^cZ>IK7G~s zQ`k(R19Z;YfR<(8Pk?sN0iFb#Kqt`pm>zErydmfL&5J89%PhPgA273xQ@1#Y&;z;` zq%yiIh%B0?9P-)9XOirqE-dIctrz69jKSZQ1(}g4(+|J4{k5jAJ`6dfPU}-*a_mjB|58*Ea^PFIa351SKyhEw(&^%3z<&G0c9mAl?m;sA5x zP?qH!8RJxb2h9a^HAa~vT>xjyb571ub_n|#xhU@W8{EiwT#R@RMD(=0PXJAI}hvLP$$q=3m)bMAWiUNym1qqvTG*khJVSEh&BI%7|K5`pRc=(8y2ImgYD`qL+L)@Ui?j7SP;yGnb-6B= zFX!_0WSg&Fei=DCKIzhVO3Od@YuZVVOln=G;&n=p6@9-# zdXL#^R}}p(Gl7hLfiy~LQ*Sgz0v_spK|bHa8^3_OPV1Gx&ZqEjdk$?j!+%bRTw`_5 za1!l8@dWntt17101F;8U53F|&IFf5(_gwFzY4!dcH6IRhgzi(~au2JwurA+R1@rOX zfBk5%t+mHZ&^=GOK<=XKmqFPBhQP4-XQ0)+)q>*-;yg#Pua8?Y366k+<9hiOQ>%L* z5#C?u|9cVQdmwi@l8=+E>4C&<$>gn$BDYa_N#2#&{w-xL_rSLDM}uzpr_sLhyuH0# Ya&{C?+5N?mGgS0%R$9{^8*Wtp0w|dgH~;_u literal 22528 zcmeHP2|U!>+ds2dvzBBlgCX6-AWLLVNkS?uvJA$~bjPFxQM70g?WtQNB~nzXD^lqu zl~PG7kyLKfZPkK#pEKiklbd_H_kZ8_^M60hGxFRx_K}c@K0bdMI@~6b($^M?+Uzo^noF9-B z6T~t187v6@8CM{R1IL4aqnw^{U*l4cz8KOmC(@%8Aq>-cRZqEp%3ncwGU$ENqvu;V z9rCvfLFgD1+^zy`gSZm3r$+_EJ`-UFqJ5uJ1R+A+AIExo6QPgbL^&c45g!HRaM6;U zM)l=5AD%*e_4JKn`}8J4PjA_`go$?jmA-$fzrQy0)kn-XxuPY*TTN>5!pJ8=p3!5M zzU3M~j_0id7?lAW&p%8=K4n!wJg=ub5hv0GssH(t&`W3{#tYYj=Otlhf)n)Bw&P2RFPUt1Zu`mCt^uGE3tvE4W{k6M4Inmy~ z%lp%Z*Jc%Im?JQWEnwD*pn>p|pyF>2A^o>oAdnpZV<)8V)&lS;rS$CeKEC^It?*rR zTvRcnaLBK)HVZggmjoNsp0V%xE-WDIWG@Tg9V*Eb8K#+&jNHk{Vy@1tRHR;5V?Zlx zw9+UP7t;i2rDclH0osKK4Jn*TQ7UYtNTu!neRtbSpD_6U&;lx=vS=XO;>;(>A~Cr0 z!GCb_$IAlR3fTD9B^f{iXeH$aA5;gkF3QBf|ah6W@@hZ`ly| z^f&?f%D>0>+@A3@f_bC_wmc8huMY4S@D3mfmr9BN9e^odJirqW1c(7F2W$rPR}Smd zg*tA7y`KX9sh;n>oH^KTCg43l9PFtC@B}OdqyaJkg@Dt58bBhT5%3!TFDY^W8h`nHVm~;xFAqVHdXp$3|uZ!udgubQ^>-rj%;&7|v8D3M}a9(>9ieB@M04H)WM5!n$NF^;~<=7B})X)&TOpLA*MI=c|gdiXS z5+)Xt`R$~kaM~N*8E7Tysfi&UsBd#-;pCtos&(Mjw|l&PLK z<-XFhlyvjvDu-Jwlw*ZMXIedP`LLD#FzslGjj8JaX9w}5Bd6Ebw&i{pU!`cTfAQzx zZ}s<#Ov$RAb#~eZ<-(^vER$;{xec^e1J(?zD3nU^y&6nY9`(4~a%W20y}DTurS++Y zQ{zm1F1w!LDfqp$c{=FEVPE=>zPHr3?o@6xb-D3Y(c_kx-@$dcRlFk=jLSRS z&LkYtYjLDTnQe3ledM2&YtTHkEG?InS39hx_{4|0F6hc1I~u-lvtkl-a1}rezztzV zvJ84XCo`j%!7L7*AyStoOIFb2Y@6b<=BM2;3plCk=zsXq!ISTh!wwa@%FOG#?%ir> zPBOdlK;h>0Nz$|Qc8E!@VpZ#}JpNmp-aggbyH0Aq=tVSD4u0IaK&k5J+CdK{UWlF& zuXQtRSwV3m?MnDO*R$B2iH&qnG-K1BaC3^)@G2mle&A45+exb$ev-==Sz)iuDEF ztNp~UOcke+Cth$MH~Bo*NFFe!(dBtl_mKv~HgL^7%tbblda8A&VAI z?|PQ^Yo13(b;qvrwU!lgL?!71T{5C2z=U#)Y7!GO zd%Ad$V*JH2<8hYOE@lBw26ayRILy)h?CMnCYc`8N)Kwc?VMernXL+*8Jwbdrr}+*3YE+^*$y3`^nljIS1B1ty3z2S7-A-fT$ijvr#B}ti~t+z5) zu7AW8^OjJ3+Z|LqCh*GBtU(PjNmGt&9%0}vC%urnf5c8oE1>d9mx=6NB#8^_30f zypLUM-^H6;%hnFjX_%cc)#|{B!gKkD5*Bs6b~|+HL+35^Q{0dj9IpS_;q>!oPp-IH zwnA^Yh=tDS;hmg;8^q z5!d2ur)XXy51+Z{*P)ib+%77Sl_+Xkykq9IY2riID|H@@ zk*6kj9UHT_?XA+hf<=i}Bj|0?Pi`s_N}-(owohtx;pScXOCLSvelql~G+jT5^Xv76$x6oQl$g8g+)KI4N_vV* zo7h7IdxeFvTe)Y}Z7XxAHGWUhj;-(7>UXK0eMNK~ts-ONDl~=ddB5q>y{)reTKe1= zbN1l08!bhf{7>#M&(1zRduCRAvZ3RXkmW@kPbp$a!@Opj8aO>jq2;Uj&+x0ai#d?@ zggT(DwIRsNZJ0cBS(}o{N9&tfYJ*md zgng;`T8?j`jMj;cuy{F*{KWmiO{up`j(5&aw-WvID#fvx{Apm|Z#gVl(RQQq@a+=2 z>!^;WUF2S-&S_N~r`It=Gw(!L(Wa?UQS7^CrEAn?S-H8aH#4#IDsX#W9v+{0X2IL` z`Kd~AmhGutMVs>V?>ue0q`ktcz1sSI%+wpfapN1hb_OMSR#Bu_fo4qasz(dp8H9aT01mDg$=O|HnwJG4=6+}=eR zrP_n$2JKn#Q_0|+2OkEQPFiP?KYiYTZJwS>0u6m6UjJymcm0TxYkH|KIBzrB?bVM& ziR<3El*Z6GGO>0jJ!88aiyoglV0?61&B)l1dUs_MN98PT&Ks&|JE}u6?!mi8uSJ!z z6giTHd1_Ka+}_TOC5xK9+bMRO{OaBj2d|`cFR7`Ez9qUjEziZoUPW$ z8kMa&R{qTKMm>w0PaQ+%)ZJ=7o1{(!EzaY4(Sfijc{L$&wV;f$^rS7}VJ!!aG?wdyNngGwIC$|ujfec%Vmm6eJ4Nnv5t%1iPK$EFW{d3^fT!$-p#99xx~ zE9Je%>*nhQ8M!rCY44v817Eya}=ys7)OYSPs#@$7b16JI+Cr(+qeblJ_S|Jz zANC%q-CQ^0$>U;E%Z22s0Ob9Am2;N2#FjcWJBQJARueN!{Z^0Nbf+%Stzp%MRNfTr zHqoxxw+*JxZYor$O>*8pIObVSlu})OMd57aH(^&Z3oqy%T;5vTCH~ytsm00CTg{e3 z-oJdj>hbxFzp(ZiJiU~0;?(Iwb#sQiA2iS8MJ#pwl?iL_=6bU`RB~dd5_GObL&oUH} zJC@EDXR~jV$ffJludFGkr6fNxkv6rVPibqHKBy4K;(no*`OC%LM9Q1pGk8b3%vDl?4b&CAyQkPi-&rQ2 zyZij*My>66F4T)&B_-z`K*t1KCy__N3-kUq>X5A$eP!G3KIuyZeRr;(OEbWpz4ozv z3-;X!elDmCTjW1%t$LZiucp4+vd^08zNU%4N$abl?HEKYPNJIYGaP@kbmV?_n914E;Ntu&`k8$ZfilNQZoMg=k> z*ikGi&4sLJO&e=z$+1j}fC$cH=3E*mh>EsgT4{!GxpOTH459;I+susCXU}CtL0S-- z6UpR4loM)g%HMSmHPQeW;I8p)e-~Gb@6Q8d|u%IW*H#$V&$O;o1mw`<>P=9d|{#~s= zH$Jbn@N)~i@$fA^e$CTl|BNri^%MGGNDfBaxAa2#HJ*6D5lXtS0b3>d}M} zabPN12xa`CWCX;F_4&0A5T%kaQF%Y0Mu)G|ZihHfKa*d^MkbB4T!KszMHCXIax%*N z7Z;Dh*1#*8;MV!rG%siPYWOlvO%hFU7`)npn6CjSs+Vf4Ki;Ok9MP6 zREUnFN^%u>FinxBOdCqmp)qJiG-H|tjiOgN4m%h z#iDGK11(pgsnNt4k_;(^3`35gz))liWz6dK>h|uY^2DJnmOL+B6R+iOw?JGhQ{*Vx zftDt6`m`Kdn)nFzN&4m}d}Z_y$(9Ip;`@Mp$jKci40wsXGwBdkf;|K*R0;io{|*_( z-{`CQG4wQGDfP$6fhCR0cD?%G79y9QE{;YH zKXQiwvNez(4;-h%*8(i?g7<`Y((VD^(Jutx;jRJT#U2kRUYVZ)@E1iV059}vpac)G zE^yeR@ZX8yyVz+E#}~DJ!132+1aRy*%mpq7JPtU%lT8MW>&Fvz7;s|d^$p|qN|Qqa zIP7S45SQi_6cif3qD_xx;SXQmyuEecwz%S*NrZkNn;Xf``z^W*d-Q%a?+i945YlY{ zXnchhX14gD7~Js`yhi!qIX|dk4^nJX=;H4S0*(`appb+X5L!TJ0igwi77$uMXaS)G zgccB5KxhG>1%wv(W()k;`Ja5V`sNOORk;mY;QX)m;UELF9suC8IzE@q1mJVI9{`{C z@zp9m@8e5Ld`A!mfPZF1$pC!0vj%|A?;8R5{JsN#uTS>?zIKl9ef}lR!Jp5+a(tao zv|oG63VpWpHOaQt2#zmv!3etee4=Y0GQAD`#p2VnTWRE5job3cCntqg!4 zvH~X|!N1E41&-6zfztpQ08M}vfDX_G;PftlZciA;F@*H*TL3%LvH5N^j^MV&1;?3% zVD)b&MI=HZT7~D3s05x(2%Y9X(Rcw9Jl!WR<^M9)4-if4NiYN#KPEV>MS#=JKOq@# zsl)Z-d5ihC=)gWG#IGmKfZlUZAh_+Z!Leu_-`Nn`;af=s>c@9exNc0}T8{tWfy^S{ znuu$~4vpCM6JZpB_-(=xF1R(Z`HqhL>D7h$vBmLc_gm}dyDZ`lFObL|dj9v?W8Pzb ze&@fj|Jdh-=syYE7O^|$TyW#^M`1YSZ_z-wIV9@-*8bxPuw&-GMgOG!f2(e+=Rc_* z&t2i#+S`KPSC|B&Pk}FZ-0Qw;w;Ifi&wg{*ag8>{Y!rCq8`G|wD=y~m3nov+@^S0{ e{s$U|^&v7Z61?#B&C#1NhkwK~R7n4=7Wf~tevZ`u From eb7d27d35c955711242e17d5729668b22d579382 Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Wed, 2 Feb 2022 19:51:50 +0200 Subject: [PATCH 15/16] Add FAQ about Microsoft office documents mis-detected --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index e63f0b6..11b38c1 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,22 @@ using magic numbers is slow, inaccurate, and non-standard. Most of the times protocols have methods for specifying such metadata; e.g., `Content-Type` header in HTTP and SMTP. +## FAQ +Q: My file is in the list of [supported MIME types](supported_mimes.md) but +it is not correctly detected. What should I do? + +A: Some file formats (often Microsoft Office documents) keep their signatures +towards the end of the file. Try increasing the number of bytes used for detection +with: +```go +mimetype.SetLimit(1024*1024) // Set limit to 1MB. +// or +mimetype.SetLimit(0) // No limit, whole file content used. +mimetype.DetectFile("file.doc") +``` +If increasing the limit does not help, please +[open an issue](https://github.com/gabriel-vasile/mimetype/issues/new?assignees=&labels=&template=mismatched-mime-type-detected.md&title=). + ## Structure **mimetype** uses a hierarchical structure to keep the MIME type detection logic. This reduces the number of calls needed for detecting the file type. The reason From c7909c100095adef737f21c436654089a5f1ef7d Mon Sep 17 00:00:00 2001 From: Gabriel Vasile Date: Thu, 3 Feb 2022 17:57:18 +0200 Subject: [PATCH 16/16] Add missing comments for exported fields --- internal/charset/charset.go | 8 ++++++++ internal/magic/{signature.go => magic.go} | 0 2 files changed, 8 insertions(+) rename internal/magic/{signature.go => magic.go} (100%) diff --git a/internal/charset/charset.go b/internal/charset/charset.go index d1ccdd1..b0e1388 100644 --- a/internal/charset/charset.go +++ b/internal/charset/charset.go @@ -52,6 +52,7 @@ var ( } ) +// FromBOM returns the charset declared in the BOM of content. func FromBOM(content []byte) string { for _, b := range boms { if bytes.HasPrefix(content, b.bom) { @@ -61,6 +62,8 @@ func FromBOM(content []byte) string { return "" } +// FromPlain returns the charset of a plain text. It relies on BOM presence +// and it falls back on checking each byte in content. func FromPlain(content []byte) string { if len(content) == 0 { return "" @@ -129,6 +132,9 @@ func ascii(content []byte) bool { return true } +// FromXML returns the charset of an XML document. It relies on the XML +// header and falls back on the plain +// text content. func FromXML(content []byte) string { if cset := fromXML(content); cset != "" { return cset @@ -151,6 +157,8 @@ func fromXML(content []byte) string { return strings.ToLower(xmlEncoding(string(t.Inst))) } +// FromHTML returns the charset of an HTML document. It relies on the meta tag +// and falls back on the plain text content. func FromHTML(content []byte) string { if cset := fromHTML(content); cset != "" { return cset diff --git a/internal/magic/signature.go b/internal/magic/magic.go similarity index 100% rename from internal/magic/signature.go rename to internal/magic/magic.go