Skip to content

Commit

Permalink
unicode/norm: lazy load recompMap from string
Browse files Browse the repository at this point in the history
recompMap gets populated on init, which requires 21kB of memory.
Instead populate it only when we need it.

This will put it in the rodata section of the binary, which means that
the Go compiler can apply better optimizations to it. The string is an
alternating series of 4 byte keys and 4 byte values, which get built
into a map the first time they are required.

Updates golang/go#26752.

Change-Id: I4f9b7d74af6df42b58999cf42cf5ada9646aa98d
Reviewed-on: https://go-review.googlesource.com/127926
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
  • Loading branch information
kevinburke authored and bradfitz committed Aug 7, 2018
1 parent cb67308 commit 2378460
Show file tree
Hide file tree
Showing 4 changed files with 1,921 additions and 1,889 deletions.
14 changes: 14 additions & 0 deletions unicode/norm/forminfo.go
Expand Up @@ -4,6 +4,8 @@

package norm

import "encoding/binary"

// This file contains Form-specific logic and wrappers for data in tables.go.

// Rune info is stored in a separate trie per composing form. A composing form
Expand Down Expand Up @@ -178,6 +180,17 @@ func (p Properties) TrailCCC() uint8 {
return ccc[p.tccc]
}

func buildRecompMap() {
recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
var buf [8]byte
for i := 0; i < len(recompMapPacked); i += 8 {
copy(buf[:], recompMapPacked[i:i+8])
key := binary.BigEndian.Uint32(buf[:4])
val := binary.BigEndian.Uint32(buf[4:])
recompMap[key] = rune(val)
}
}

// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
Expand All @@ -188,6 +201,7 @@ func (p Properties) TrailCCC() uint8 {
// combine returns the combined rune or 0 if it doesn't exist.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
recompMapOnce.Do(buildRecompMap)
return recompMap[key]
}

Expand Down
16 changes: 13 additions & 3 deletions unicode/norm/maketables.go
Expand Up @@ -12,6 +12,7 @@ package main

import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"io"
Expand Down Expand Up @@ -735,6 +736,8 @@ func makeTables() {
max = n
}
}
fmt.Fprintln(w, `import "sync"`)
fmt.Fprintln(w)

fmt.Fprintln(w, "const (")
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
Expand Down Expand Up @@ -782,16 +785,23 @@ func makeTables() {
sz := nrentries * 8
size += sz
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
fmt.Fprintln(w, "var recompMap map[uint32]rune")
fmt.Fprintln(w, "var recompMapOnce sync.Once\n")
fmt.Fprintln(w, `const recompMapPacked = "" +`)
var buf [8]byte
for i, c := range chars {
f := c.forms[FCanonical]
d := f.decomp
if !f.isOneWay && len(d) > 0 {
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
binary.BigEndian.PutUint32(buf[:4], key)
binary.BigEndian.PutUint32(buf[4:], uint32(i))
fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i))
}
}
fmt.Fprintf(w, "}\n\n")
// hack so we don't have to special case the trailing plus sign
fmt.Fprintf(w, ` ""`)
fmt.Fprintln(w)
}

fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
Expand Down

0 comments on commit 2378460

Please sign in to comment.