/
readme.go
66 lines (57 loc) · 1.92 KB
/
readme.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/*
Copyright 2024 Chainguard, Inc.
SPDX-License-Identifier: Apache-2.0
*/
package validation
import (
"fmt"
"html"
"regexp"
"strings"
"github.com/google/go-cmp/cmp"
"github.com/microcosm-cc/bluemonday"
"github.com/russross/blackfriday/v2"
)
var ErrUnsafeReadme = fmt.Errorf("readme contained unsafe html content")
// ValidateReadme validates the contents of a Markdown README.md file.
// If the contents are invalid, a string will be returned containing the
// diff of what the Markdown would look like as HTML if properly sanitized.
func ValidateReadme(readme string) (string, error) {
// Treat empty readme as valid, prevent further processing
if readme == "" {
return "", nil
}
unsafe := readmeToHTML(readme)
safe := sanitizeHTML(unsafe)
// After converting the Markdown to HTML,
// make sure there is no diff after sanitizing it.
// Unescape any encoded HTML tags for proper comparison.
if diff := cmp.Diff(unescapeHTML(unsafe), unescapeHTML(safe)); diff != "" {
return diff, ErrUnsafeReadme
}
return "", nil
}
func readmeToHTML(rawMarkdown string) string {
s := string(blackfriday.Run([]byte(rawMarkdown)))
// Fix issue where single tags get extra space on conversion (e.g. "<hr />")
s = strings.ReplaceAll(s, " />", "/>")
return s
}
var bluemondayPolicy = func() *bluemonday.Policy {
p := bluemonday.UGCPolicy()
// Allow fenced code block classes
p = p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code")
// Allow links without ref="nofollow" which are not set automatically on links on markdown conversion
p = p.RequireNoFollowOnLinks(false)
// Allow custom height and width on images
p = p.AllowAttrs("width", "height").OnElements("img")
// Allow HTML comments
p.AllowComments()
return p
}()
func sanitizeHTML(unsafeHTML string) string {
return bluemondayPolicy.Sanitize(unsafeHTML)
}
func unescapeHTML(safeHTML string) string {
return html.UnescapeString(safeHTML)
}