Skip to content

Commit

Permalink
Merge branch 'master' of github.com:gabriel-vasile/mimetype into 146_…
Browse files Browse the repository at this point in the history
…installishield
  • Loading branch information
gabriel-vasile authored and ysyrota committed Apr 30, 2022
2 parents ae439f5 + 6e3aeb1 commit 8c6b77e
Show file tree
Hide file tree
Showing 12 changed files with 150 additions and 16 deletions.
10 changes: 9 additions & 1 deletion .github/workflows/codeql.yml
Expand Up @@ -6,13 +6,21 @@ on:
pull_request:
branches: [master]

permissions:
contents: read

env:
CGO_ENABLED: 0

jobs:
CodeQL-Build:
runs-on: ubuntu-latest
permissions:
security-events: write

steps:
- name: Check out code
uses: actions/checkout@v2.4.0
uses: actions/checkout@v3.0.1

- name: Initialize CodeQL
uses: github/codeql-action/init@v2.1.6
Expand Down
21 changes: 14 additions & 7 deletions .github/workflows/go.yml
@@ -1,30 +1,37 @@
name: Run Tests
on:
push:
branches:
- master
pull_request:

name: run tests
permissions:
contents: read

jobs:
lint:
strategy:
matrix:
go-version: ["1.18.x"]
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/setup-go@v2.2.0
with:
go-version: 1.16
- name: Checkout code
uses: actions/checkout@v2.4.0
uses: actions/checkout@v3.0.1
- name: Run linters
uses: golangci/golangci-lint-action@v2.5.2
uses: golangci/golangci-lint-action@v3.1.0
with:
version: "v1.37.1"
go-version: ${{ matrix.go-version }}

test:
strategy:
matrix:
go-version: ["1.12.0", "^1.17.6"]
platform: [ubuntu-latest, macos-latest, windows-latest]
go-version: ["1.12.0", "1.18.x"]
platform: [ubuntu-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
Expand All @@ -33,7 +40,7 @@ jobs:
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2.4.0
uses: actions/checkout@v3.0.1
- name: Run tests
run: go test -race ./...

Expand All @@ -46,7 +53,7 @@ jobs:
with:
go-version: 1.16
- name: Checkout code
uses: actions/checkout@v2.4.0
uses: actions/checkout@v3.0.1
- name: Calc coverage
run: |
go test -v -covermode=count -coverprofile=coverage.out
Expand Down
9 changes: 7 additions & 2 deletions internal/charset/charset.go
Expand Up @@ -157,9 +157,14 @@ func fromXML(content []byte) string {
return strings.ToLower(xmlEncoding(string(t.Inst)))
}

// FromHTML returns the charset of an HTML document. It relies on the meta tag
// <meta charset="UTF-8"> and falls back on the plain text content.
// FromHTML returns the charset of an HTML document. It first looks if a BOM is
// present and if so uses it to determine the charset. If no BOM is present,
// it relies on the meta tag <meta charset="UTF-8"> and falls back on the
// plain text content.
func FromHTML(content []byte) string {
if cset := FromBOM(content); cset != "" {
return cset
}
if cset := fromHTML(content); cset != "" {
return cset
}
Expand Down
30 changes: 30 additions & 0 deletions internal/charset/charset_test.go
Expand Up @@ -24,6 +24,29 @@ const htmlDoc = `<!DOCTYPE html>
<div class="container footer">さ</div>
</body>
</html>`
const htmlDocWithIncorrectCharset = `<!DOCTYPE html>
<!--
Some comment
-->
<html dir="ltr" mozdisallowselectionprint>
<head>
<meta charset="ISO-8859-16">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="some name" content="notranslate">
<title>test</title>
<link rel="stylesheet" href="html.utf8bom.css">
</head>
<body tabindex="1">
<div id="printContainer"></div>
</body>
</html>`

func TestFromXML(t *testing.T) {
charset := FromXML([]byte(xmlDoc))
Expand All @@ -39,6 +62,13 @@ func TestFromHTML(t *testing.T) {
}
}

func TestFromHTMLWithBOM(t *testing.T) {
charset := FromHTML(append([]byte{0xEF, 0xBB, 0xBF}, []byte(htmlDocWithIncorrectCharset)...))
if charset != "utf-8" {
t.Errorf("expected: utf-8; got: %s", charset)
}
}

func TestFromPlain(t *testing.T) {
tcases := []struct {
raw []byte
Expand Down
10 changes: 8 additions & 2 deletions internal/magic/archive.go
Expand Up @@ -41,10 +41,16 @@ var (
Cpio = prefix([]byte("070707"), []byte("070701"), []byte("070702"))
// RAR matches a RAR archive file.
RAR = prefix([]byte("Rar!\x1A\x07\x00"), []byte("Rar!\x1A\x07\x01\x00"))
// InstallShieldCab matches an InstallShield Cabinet archive file.
InstallShieldCab = prefix([]byte("ISc("))
)

// InstallShieldCab matches an InstallShield Cabinet archive file.
func InstallShieldCab(raw []byte, _ uint32) bool {
return len(raw) > 7 &&
bytes.Equal(raw[0:4], []byte("ISc(")) &&
raw[6] == 0 &&
(raw[7] == 1 || raw[7] == 2 || raw[7] == 4)
}

// Zstd matches a Zstandard archive file.
func Zstd(raw []byte, limit uint32) bool {
return len(raw) >= 4 &&
Expand Down
9 changes: 8 additions & 1 deletion internal/magic/magic.go
Expand Up @@ -104,7 +104,14 @@ func xmlCheck(sig xmlSig, raw []byte) bool {
// matches the raw input.
func markup(sigs ...[]byte) Detector {
return func(raw []byte, limit uint32) bool {
raw = trimLWS(raw)
if bytes.HasPrefix(raw, []byte{0xEF, 0xBB, 0xBF}) {
// We skip the UTF-8 BOM if present to ensure we correctly
// process any leading whitespace. The presence of the BOM
// is taken into account during charset detection in charset.go.
raw = trimLWS(raw[3:])
} else {
raw = trimLWS(raw)
}
if len(raw) == 0 {
return false
}
Expand Down
2 changes: 1 addition & 1 deletion mimetype_test.go
Expand Up @@ -41,7 +41,7 @@ var files = map[string]string{
"bpg.bpg": "image/bpg",
"bz2.bz2": "application/x-bzip2",
"cab.cab": "application/vnd.ms-cab-compressed",
"cab.is.cab": "application/x-installshield-cab",
"cab.is.cab": "application/x-installshield",
"class.class": "application/x-java-applet",
"crx.crx": "application/x-chrome-extension",
"csv.csv": "text/csv",
Expand Down
2 changes: 1 addition & 1 deletion supported_mimes.md
Expand Up @@ -138,7 +138,7 @@ Extension | MIME type | Aliases
**.gbr** | image/x-gimp-gbr | -
**.glb** | model/gltf-binary | -
**.avif** | image/avif | -
**.cab** | application/x-installshield-cab | -
**.cab** | application/x-installshield | -
**.txt** | text/plain | -
**.html** | text/html | -
**.svg** | image/svg+xml | -
Expand Down
23 changes: 23 additions & 0 deletions testdata/html.utf8bom.html
@@ -0,0 +1,23 @@
<!DOCTYPE html>
<!--
Some comment
-->
<html dir="ltr" mozdisallowselectionprint>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="some name" content="notranslate">
<title>test</title>


<link rel="stylesheet" href="html.utf8bom.css">



</head>

<body tabindex="1">
<div id="printContainer"></div>
</body>
</html>
24 changes: 24 additions & 0 deletions testdata/html.utf8bomdetect.html
@@ -0,0 +1,24 @@

<!DOCTYPE html>
<!--
Some comment
-->
<html dir="ltr" mozdisallowselectionprint>
<head>
<meta charset="ISO-8859-16">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="some name" content="notranslate">
<title>test</title>


<link rel="stylesheet" href="html.utf8bom.css">



</head>

<body tabindex="1">
<div id="printContainer"></div>
</body>
</html>
24 changes: 24 additions & 0 deletions testdata/html.utf8bomws.html
@@ -0,0 +1,24 @@

<!DOCTYPE html>
<!--
Some comment
-->
<html dir="ltr" mozdisallowselectionprint>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="some name" content="notranslate">
<title>test</title>


<link rel="stylesheet" href="html.utf8bom.css">



</head>

<body tabindex="1">
<div id="printContainer"></div>
</body>
</html>
2 changes: 1 addition & 1 deletion tree.go
Expand Up @@ -243,7 +243,7 @@ var (
accdb = newMIME("application/x-msaccess", ".accdb", magic.MsAccessAce)
zstd = newMIME("application/zstd", ".zst", magic.Zstd)
cab = newMIME("application/vnd.ms-cab-compressed", ".cab", magic.Cab)
cabIS = newMIME("application/x-installshield-cab", ".cab", magic.InstallShieldCab)
cabIS = newMIME("application/x-installshield", ".cab", magic.InstallShieldCab)
lzip = newMIME("application/lzip", ".lz", magic.Lzip).alias("application/x-lzip")
torrent = newMIME("application/x-bittorrent", ".torrent", magic.Torrent)
cpio = newMIME("application/x-cpio", ".cpio", magic.Cpio)
Expand Down

0 comments on commit 8c6b77e

Please sign in to comment.