-
Notifications
You must be signed in to change notification settings - Fork 21
/
sanitize.go
258 lines (245 loc) · 10.3 KB
/
sanitize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package template
import (
"fmt"
"regexp"
"strings"
)
// sanitizerForContext returns an ordered list of function names that will be called to
// sanitize data values found in the HTML context defined by c.
func sanitizerForContext(c context) ([]string, error) {
switch c.state {
case stateTag, stateAttrName, stateAfterName:
return nil, fmt.Errorf("actions must not affect element or attribute names")
case stateHTMLCmt:
return []string{sanitizeHTMLCommentFuncName}, nil
}
if len(c.element.names) == 0 && c.element.name == "" && c.state == stateText {
// Not in an HTML element.
return []string{sanitizeHTMLFuncName}, nil
}
if c.attr.name != "" || len(c.attr.names) > 0 {
// We are in an attribute value context.
if c.delim != delimDoubleQuote && c.delim != delimSingleQuote {
// TODO: consider disallowing single-quoted or unquoted attribute values completely, even in hardcoded template text.
return nil, fmt.Errorf("unquoted attribute values disallowed")
}
return sanitizersForAttributeValue(c)
}
// Otherwise, we are in an element content context.
elementContentSanitizer, err := sanitizerForElementContent(c)
return appendIfNotEmpty([]string{}, elementContentSanitizer), err
}
// appendIfNotEmpty appends the given strings that are non-empty to the given slice.
func appendIfNotEmpty(slice []string, strings ...string) []string {
for _, s := range strings {
if s != "" {
slice = append(slice, s)
}
}
return slice
}
// sanitizersForAttributeValue returns a list of names of functions that will be
// called in order to sanitize data values found the HTML attribtue value context c.
func sanitizersForAttributeValue(c context) ([]string, error) {
// Ensure that all combinations of element and attribute names for this context results
// in the same attribute value sanitization context.
var elems, attrs []string
if len(c.element.names) == 0 {
elems = []string{c.element.name}
} else {
elems = c.element.names
}
if len(c.attr.names) == 0 {
attrs = []string{c.attr.name}
} else {
attrs = c.attr.names
}
var sc0 sanitizationContext
var elem0, attr0 string
for i, elem := range elems {
for j, attr := range attrs {
sc, err := sanitizationContextForAttrVal(elem, attr, c.linkRel)
if err != nil {
if len(elems) == 1 && len(attrs) == 1 {
return nil, err
}
return nil, fmt.Errorf(`conditional branch with {element=%q, attribute=%q} results in sanitization error: %s`, elem, attr, err)
}
if i == 0 && j == 0 {
sc0, elem0, attr0 = sc, elem, attr
continue
}
if sc != sc0 {
return nil, fmt.Errorf(
`conditional branches end in different attribute value sanitization contexts: {element=%q, attribute=%q} has sanitization context %q, {element=%q, attribute=%q} has sanitization context %q`,
elem0, attr0, sc0, elem, attr, sc)
}
}
}
if sc0.isEnum() && c.attr.value != "" {
return nil, fmt.Errorf("partial substitutions are disallowed in the %q attribute value context of a %q element", c.attr.name, c.element.name)
}
if sc0 == sanitizationContextStyle && c.attr.value != "" {
if err := validateDoesNotEndsWithCharRefPrefix(c.attr.value); err != nil {
return nil, fmt.Errorf("action cannot be interpolated into the %q attribute value of this %q element: %s", c.attr.name, c.element.name, err)
}
}
// ret is a stack of sanitizer names that will be built in reverse.
var ret []string
// All attribute values must be HTML-escaped at run time by sanitizeHTML to eliminate
// any HTML markup that can cause the HTML parser to transition out of the attribute value state.
// These attribute values will later be HTML-unescaped by the HTML parser in the browser.
ret = append(ret, sanitizeHTMLFuncName)
sanitizer := sc0.sanitizerName()
if !sc0.isURLorTrustedResourceURL() {
return reverse(appendIfNotEmpty(ret, sanitizer)), nil
}
urlAttrValPrefix := c.attr.value
if urlAttrValPrefix == "" {
// Attribute value prefixes in URL or TrustedResourceURL sanitization contexts
// must sanitized and normalized.
return reverse(appendIfNotEmpty(ret, normalizeURLFuncName, sanitizer)), nil
}
// Action occurs after a URL or TrustedResourceURL prefix.
if c.attr.ambiguousValue {
return nil, fmt.Errorf("actions must not occur after an ambiguous URL prefix in the %q attribute value context of a %q element", c.attr.name, c.element.name)
}
validator, ok := urlPrefixValidators[sc0]
if !ok {
return nil, fmt.Errorf("cannot validate attribute value prefix %q in the %q sanitization context", c.attr.value, sc0)
}
if err := validator(c.attr.value); err != nil {
return nil, fmt.Errorf("action cannot be interpolated into the %q URL attribute value of this %q element: %s", c.attr.name, c.element.name, err)
}
switch {
case sc0 == sanitizationContextTrustedResourceURL:
// Untrusted data that occurs anywhere after TrustedResourceURL prefix must be query-escaped
// to prevent the injection of any new path segments or URL components. Moreover, they must
// not contain any ".." dot-segments.
ret = append(ret, queryEscapeURLFuncName, validateTrustedResourceURLSubstitutionFuncName)
case strings.ContainsAny(urlAttrValPrefix, "#?"):
// For URLs, we only escape in the query or fragment part to prevent the injection of new query
// parameters or fragments.
ret = append(ret, queryEscapeURLFuncName)
default:
ret = append(ret, normalizeURLFuncName)
}
return reverse(ret), nil
}
// reverse reverses s and returns it.
func reverse(s []string) []string {
for head, tail := 0, len(s)-1; head < tail; head, tail = head+1, tail-1 {
s[head], s[tail] = s[tail], s[head]
}
return s
}
// sanitizationContextForAttrVal returns the sanitization context for attr when it
// appears within element.
func sanitizationContextForAttrVal(element, attr, linkRel string) (sanitizationContext, error) {
if element == "link" && attr == "href" {
// Special case: safehtml.URL values are allowed in a link element's href attribute if that element's
// rel attribute possesses certain values.
relVals := strings.Fields(linkRel)
for _, val := range relVals {
if urlLinkRelVals[val] {
return sanitizationContextTrustedResourceURLOrURL, nil
}
}
}
if dataAttributeNamePattern.MatchString(attr) {
// Special case: data-* attributes are specified by HTML5 to hold custom data private to
// the page or application; they should not be interpreted by browsers. Therefore, no
// sanitization is required for these attribute values.
return sanitizationContextNone, nil
}
if sc, ok := elementSpecificAttrValSanitizationContext[attr][element]; ok {
return sc, nil
}
sc, isWhitelistedAttr := globalAttrValSanitizationContext[attr]
_, isWhitelistedElement := elementContentSanitizationContext[element]
if isWhitelistedAttr && (isWhitelistedElement || allowedVoidElements[element]) {
// Only sanitize attributes that appear in elements whose semantics are known.
// Thes attributes might have different semantics in other standard or custom
// elements that our sanitization policy does not handle correctly.
return sc, nil
}
return 0, fmt.Errorf("actions must not occur in the %q attribute value context of a %q element", attr, element)
}
// dataAttributeNamePattern matches valid data attribute names.
// This pattern is conservative and matches only a subset of the valid names defined in
// https://html.spec.whatwg.org/multipage/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes
var dataAttributeNamePattern = regexp.MustCompile(`^data-[a-z_][-a-z0-9_]*$`)
// endsWithCharRefPrefixPattern matches strings that end in an incomplete
// HTML character reference.
//
// See https://html.spec.whatwg.org/multipage/syntax.html#character-references.
var endsWithCharRefPrefixPattern = regexp.MustCompile(
`&(?:[[:alpha:]][[:alnum:]]*|#(?:[xX][[:xdigit:]]*|[[:digit:]]*))?$`)
// validateDoesNotEndsWithCharRefPrefix returns an error only if the given prefix ends
// with an incomplete HTML character reference.
func validateDoesNotEndsWithCharRefPrefix(prefix string) error {
if endsWithCharRefPrefixPattern.MatchString(prefix) {
return fmt.Errorf(`prefix %q ends with an incomplete HTML character reference; did you mean "&" instead of "&"?`, prefix)
}
return nil
}
// sanitizerForElementContent returns the name of the function that will be called
// to sanitize data values found in the HTML element content context c.
func sanitizerForElementContent(c context) (string, error) {
// Ensure that all other possible element names for this context result in the same
// element content sanitization context.
var elems []string
if len(c.element.names) == 0 {
elems = []string{c.element.name}
} else {
elems = c.element.names
}
var sc0 sanitizationContext
var elem0 string
for i, elem := range elems {
var sc sanitizationContext
var err error
if elem == "" {
// Special case: an empty element name represents a context outside of a HTML element.
sc = sanitizationContextHTML
} else {
sc, err = sanitizationContextForElementContent(elem)
}
if err != nil {
if len(elems) == 1 {
return "", err
}
return "", fmt.Errorf(`conditional branch with element %q results in sanitization error: %s`, elem, err)
}
if i == 0 {
sc0, elem0 = sc, elem
continue
}
if sc != sc0 {
return "",
fmt.Errorf(`conditional branches end in different element content sanitization contexts: element %q has sanitization context %q, element %q has sanitization context %q`,
elem0, sc0, elem, sc)
}
}
return sc0.sanitizerName(), nil
}
// sanitizationContextForElementContent returns the element content sanitization context for the given element.
func sanitizationContextForElementContent(element string) (sanitizationContext, error) {
sc, ok := elementContentSanitizationContext[element]
if !ok {
return 0, fmt.Errorf("actions must not occur in the element content context of a %q element", element)
}
return sc, nil
}
// sanitizeHTMLComment returns the empty string regardless of input.
// Comment content does not correspond to any parsed structure or
// human-readable content, so the simplest and most secure policy is to drop
// content interpolated into comments.
// This approach is equally valid whether or not static comment content is
// removed from the template.
func sanitizeHTMLComment(_ ...interface{}) string {
return ""
}