diff --git a/src/html/escape.go b/src/html/escape.go index f50a4b937a79b..ab6fd1c7b4b34 100644 --- a/src/html/escape.go +++ b/src/html/escape.go @@ -57,8 +57,9 @@ var replacementTable = [...]rune{ // unescapeEntity reads an entity like "<" from b[src:] and writes the // corresponding "<" to b[dst:], returning the incremented dst and src cursors. // Precondition: b[src] == '&' && dst <= src. -// attribute should be true if parsing an attribute value. -func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { +func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) { + const attribute = false + // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference // i starts at 1 because we already know that s[0] == '&'. @@ -139,14 +140,14 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { break } - entityName := string(s[1:i]) - if entityName == "" { + entityName := s[1:i] + if len(entityName) == 0 { // No-op. } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' { // No-op. - } else if x := entity[entityName]; x != 0 { + } else if x := entity[string(entityName)]; x != 0 { return dst + utf8.EncodeRune(b[dst:], x), src + i - } else if x := entity2[entityName]; x[0] != 0 { + } else if x := entity2[string(entityName)]; x[0] != 0 { dst1 := dst + utf8.EncodeRune(b[dst:], x[0]) return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i } else if !attribute { @@ -155,7 +156,7 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { maxLen = longestEntityWithoutSemicolon } for j := maxLen; j > 1; j-- { - if x := entity[entityName[:j]]; x != 0 { + if x := entity[string(entityName[:j])]; x != 0 { return dst + utf8.EncodeRune(b[dst:], x), src + j + 1 } } @@ -166,26 +167,6 @@ func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { return dst1, src1 } -// unescape unescapes b's entities in-place, so that "a<b" becomes "a 0 { + if s[src] == '&' { + i = 0 + } else { + i = strings.IndexByte(s[src:], '&') + } + if i < 0 { + dst += copy(b[dst:], s[src:]) + break + } + + if i > 0 { + copy(b[dst:], s[src:src+i]) + } + dst, src = unescapeEntity(b, dst+i, src+i) + } + return string(b[:dst]) } diff --git a/src/html/escape_test.go b/src/html/escape_test.go index 3702626a3dccf..8b51a55409fa5 100644 --- a/src/html/escape_test.go +++ b/src/html/escape_test.go @@ -118,8 +118,10 @@ func TestUnescapeEscape(t *testing.T) { } var ( - benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100) - benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100) + benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100) + benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100) + benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&", 10) + benchUnescapeDense = strings.Repeat("&< & <", 100) ) func BenchmarkEscape(b *testing.B) { @@ -151,3 +153,17 @@ func BenchmarkUnescapeNone(b *testing.B) { n += len(UnescapeString(s)) } } + +func BenchmarkUnescapeSparse(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + n += len(UnescapeString(benchUnescapeSparse)) + } +} + +func BenchmarkUnescapeDense(b *testing.B) { + n := 0 + for i := 0; i < b.N; i++ { + n += len(UnescapeString(benchUnescapeDense)) + } +}