This repository has been archived by the owner on Apr 17, 2023. It is now read-only.
/
html.go
114 lines (100 loc) · 3.67 KB
/
html.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Copyright 2020 Daniel Erat <dan@erat.org>.
// All rights reserved.
package validate
import (
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
"regexp"
"strconv"
"strings"
"golang.org/x/net/html"
)
// Text included in https://validator.w3.org/nu/ results pages on success.
const htmlSuccess = "The document validates according to the specified schema(s)."
// HTML reads an HTML document from r and validates it using https://validator.w3.org/nu/.
// Parsed issues and the raw HTML results page returned by the validation service are returned.
// If the returned error is non-nil, an issue occurred in the validation process.
func HTML(ctx context.Context, r io.Reader) ([]Issue, []byte, error) {
resp, err := post(ctx, "https://validator.w3.org/nu/",
map[string]string{"action": "check"},
[]fileInfo{fileInfo{field: "uploaded_file", name: "data", ctype: string(HTMLDoc), r: r}})
if err != nil {
return nil, nil, err
}
defer resp.Body.Close()
out, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, nil, err
}
node, err := html.Parse(bytes.NewReader(out))
if err != nil {
return nil, out, fmt.Errorf("failed to parse response: %v", err)
}
issues := extractHTMLIssues(node)
err = checkResponse(strings.Contains(string(out), htmlSuccess), issues)
return issues, out, nil
}
// extractHTMLIssues recursively walks n and returns validation issues.
// n is all or part of a document returned by https://validator.w3.org/nu/,
// where errors are denoted by <li class="error">.
func extractHTMLIssues(n *html.Node) []Issue {
// TODO: Does the validator return warnings?
if n.Type == html.ElementNode && n.Data == "li" && getAttr(n, "class") == "error" {
return []Issue{makeHTMLIssue(n, Error)}
}
var issues []Issue
for c := n.FirstChild; c != nil; c = c.NextSibling {
issues = append(issues, extractHTMLIssues(c)...)
}
return issues
}
// makeHTMLIssue creates a new issue by examining the supplied <li class="error"> node.
//
// Here's an example error, with line breaks and whitespace added for legibility:
//
// <li class="error">
// <p> <strong>Error</strong>: <span>Saw <code><></code>. Probable causes:
// Unescaped <code><</code> (escape as <code>&lt;</code>) or mistyped
// start tag.</span>
// </p>
// <p class="location">
// <a href="#cl6c14">At line <span class="last-line">6</span>, column
// <span class="last-col">14</span></a>
// </p>
// <p class="extract"> <code>><span class="lf" title="Line break">↩</span>ueaueohtn
// u><<b>></b><> Y<span class="lf" title="Line
// break">↩</span><body><span class="lf" title="Line
// break">↩</span><p</code>
// </p>
// </li>
func makeHTMLIssue(li *html.Node, sev Severity) Issue {
is := Issue{Severity: sev}
for n := li.FirstChild; n != nil; n = n.NextSibling {
if n.Type != html.ElementNode || n.Data != "p" {
continue
}
switch getAttr(n, "class") {
case "location":
lstr := getText(n, func(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == "span" && getAttr(n, "class") == "last-line"
})
is.Line, _ = strconv.Atoi(strings.TrimSpace(lstr))
cstr := getText(n, func(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == "span" && getAttr(n, "class") == "last-col"
})
is.Col, _ = strconv.Atoi(strings.TrimSpace(cstr))
case "extract":
is.Context = strings.TrimSpace(getText(n, nil))
case "":
msg := strings.TrimSpace(getText(n, func(n *html.Node) bool {
return n.Type == html.ElementNode && n.Data == "span"
}))
is.Message = spacesAroundLines.ReplaceAllString(msg, "\n")
}
}
return is
}
var spacesAroundLines = regexp.MustCompile(`\s*\n\s*`)