diff --git a/src/html/escape.go b/src/html/escape.go
index f50a4b937a79b..ab6fd1c7b4b34 100644
--- a/src/html/escape.go
+++ b/src/html/escape.go
@@ -57,8 +57,9 @@ var replacementTable = [...]rune{
// unescapeEntity reads an entity like "<" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
-// attribute should be true if parsing an attribute value.
-func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
+func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) {
+ const attribute = false
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
// i starts at 1 because we already know that s[0] == '&'.
@@ -139,14 +140,14 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
break
}
- entityName := string(s[1:i])
- if entityName == "" {
+ entityName := s[1:i]
+ if len(entityName) == 0 {
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
- } else if x := entity[entityName]; x != 0 {
+ } else if x := entity[string(entityName)]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + i
- } else if x := entity2[entityName]; x[0] != 0 {
+ } else if x := entity2[string(entityName)]; x[0] != 0 {
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
} else if !attribute {
@@ -155,7 +156,7 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
- if x := entity[entityName[:j]]; x != 0 {
+ if x := entity[string(entityName[:j])]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
}
}
@@ -166,26 +167,6 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
return dst1, src1
}
-// unescape unescapes b's entities in-place, so that "a<b" becomes "a 0 {
+ if s[src] == '&' {
+ i = 0
+ } else {
+ i = strings.IndexByte(s[src:], '&')
+ }
+ if i < 0 {
+ dst += copy(b[dst:], s[src:])
+ break
+ }
+
+ if i > 0 {
+ copy(b[dst:], s[src:src+i])
+ }
+ dst, src = unescapeEntity(b, dst+i, src+i)
+ }
+ return string(b[:dst])
}
diff --git a/src/html/escape_test.go b/src/html/escape_test.go
index 3702626a3dccf..8b51a55409fa5 100644
--- a/src/html/escape_test.go
+++ b/src/html/escape_test.go
@@ -118,8 +118,10 @@ func TestUnescapeEscape(t *testing.T) {
}
var (
- benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
- benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
+ benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
+ benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
+ benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&", 10)
+ benchUnescapeDense = strings.Repeat("&< & <", 100)
)
func BenchmarkEscape(b *testing.B) {
@@ -151,3 +153,17 @@ func BenchmarkUnescapeNone(b *testing.B) {
n += len(UnescapeString(s))
}
}
+
+func BenchmarkUnescapeSparse(b *testing.B) {
+ n := 0
+ for i := 0; i < b.N; i++ {
+ n += len(UnescapeString(benchUnescapeSparse))
+ }
+}
+
+func BenchmarkUnescapeDense(b *testing.B) {
+ n := 0
+ for i := 0; i < b.N; i++ {
+ n += len(UnescapeString(benchUnescapeDense))
+ }
+}