Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

strings: build a string factory for safely and malloc limited converting bytes to string #32594

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
94 changes: 93 additions & 1 deletion src/strings/builder.go
Expand Up @@ -5,6 +5,7 @@
package strings

import (
"sync/atomic"
"unicode/utf8"
"unsafe"
)
Expand Down Expand Up @@ -44,7 +45,11 @@ func (b *Builder) copyCheck() {

// String returns the accumulated string.
func (b *Builder) String() string {
return *(*string)(unsafe.Pointer(&b.buf))
return bytes2String(b.buf)
}

func bytes2String(bytes []byte) string {
return *(*string)(unsafe.Pointer(&bytes))
}

// Len returns the number of accumulated bytes; b.Len() == len(b.String()).
Expand Down Expand Up @@ -122,3 +127,90 @@ func (b *Builder) WriteString(s string) (int, error) {
b.buf = append(b.buf, s...)
return len(s), nil
}

const (
// DefaultFactoryPoolSize is the default pool size for the Factory.
DefaultFactoryPoolSize = 4096
)

// Factory represents the factory object for generating immutable strings.
type Factory struct {
b Builder
}

// NewFactory generate a string factory.
func NewFactory() *Factory {
return NewFactoryWithPoolSize(DefaultFactoryPoolSize)
}

// NewFactoryWithPoolSize specify a pool size for the factory to generate
// strings, the pool size is only for the memory fragmentation preventation.
func NewFactoryWithPoolSize(size int) *Factory {
f := &Factory{}
f.b.Grow(size)
return f
}

// NewString generate a string from bytes content.
func (f *Factory) New(content []byte) string {

bCap := f.b.Cap()
bLen := f.b.Len()

if len(content)*2 > bCap {
return string(content)
}

if len(content) > bCap-bLen {
f.b.Reset()
f.b.Grow(bCap)
}

preLen := f.b.Len()
f.b.Write(content)
return f.b.String()[preLen:]
}

// for internal using, see globalFactory usage
type syncTape struct {
tape [DefaultFactoryPoolSize]byte
tPtr int64
}

func (st *syncTape) alloc(size int) ([]byte, bool) {

end := atomic.AddInt64(&st.tPtr, int64(size))
if end > int64(len(st.tape)) {
// to prevent overflow
atomic.StoreInt64(&st.tPtr, int64(len(st.tape)))
return nil, false
}

return st.tape[end-int64(size) : end], true
}

var globalFactory atomic.Value

// New generate an immutable string from mutable bytes
func New(content []byte) string {

if len(content)*2 > DefaultFactoryPoolSize {
return string(content)
}

gf := globalFactory.Load()
if gf != nil {
tape := gf.(*syncTape)
frag, ok := tape.alloc(len(content))
if ok {
copy(frag, content)
return bytes2String(frag)
}
}

tape := &syncTape{}
frag, _ := tape.alloc(len(content))
globalFactory.Store(tape)
copy(frag, content)
return bytes2String(frag)
}
126 changes: 126 additions & 0 deletions src/strings/builder_test.go
Expand Up @@ -6,6 +6,8 @@ package strings_test

import (
"bytes"
"math/rand"
"strconv"
. "strings"
"testing"
)
Expand Down Expand Up @@ -364,3 +366,127 @@ func BenchmarkBuildString_ByteBuffer(b *testing.B) {
}
})
}

func randomString(l int) string {
material := "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz1234567890"
buff := []byte(nil)
for i := 0; i < l; i++ {
buff = append(buff, material[rand.Intn(len(material))])
}
return string(buff)
}

func TestFactoryNewString(t *testing.T) {

factory := NewFactoryWithPoolSize(1024)

largeStr := randomString(4096)
largeStr0 := factory.New([]byte(largeStr))
if largeStr != largeStr0 {
t.Error("equality assuming hasn't been satisfied")
}

for count := 0; count < 1024; count++ {
str := randomString(64)
str0 := factory.New([]byte(str))
if str != str0 {
t.Error("equality assuming hasn't been satisfied")
}
}
}

func TestNewString(t *testing.T) {

largeStr := randomString(4096)
largeStr0 := New([]byte(largeStr))
if largeStr != largeStr0 {
t.Error("equality assuming hasn't been satisfied")
}

for testCount := 0; testCount < 512; testCount++ {
t.Run(strconv.Itoa(testCount), func(t *testing.T) {
t.Parallel()

for count := 0; count < 1024; count++ {
str := randomString(8)
str0 := New([]byte(str))
if str != str0 {
t.Error("equality assuming hasn't been satisfied")
}
}
})
}
}

func BenchmarkFactoryNewString(b *testing.B) {

content := []byte("hello world!")
consumer := func(str string) {}

b.Run("oldway", func(b *testing.B) {
for benchCount := 0; benchCount < b.N; benchCount++ {
for i := 0; i < 100; i++ {
str := string(content)
consumer(str)
}
}
})

b.Run("factory", func(b *testing.B) {
factory := NewFactory()
for benchCount := 0; benchCount < b.N; benchCount++ {
for i := 0; i < 100; i++ {
str := factory.New(content)
consumer(str)
}
}
})
}

func BenchmarkNewString(b *testing.B) {

content := []byte("hello world!")
consumer := func(str string) {}

b.Run("oldway-sync", func(b *testing.B) {
for benchCount := 0; benchCount < b.N; benchCount++ {
for i := 0; i < 100; i++ {
str := string(content)
consumer(str)
}
}
})

b.Run("factory-sync", func(b *testing.B) {
for benchCount := 0; benchCount < b.N; benchCount++ {
for i := 0; i < 100; i++ {
str := New(content)
consumer(str)
}
}
})

b.Run("oldway-async", func(b *testing.B) {
b.SetParallelism(2)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
for i := 0; i < 100; i++ {
str := string(content)
consumer(str)
}
}
})
})

b.Run("factory-async", func(b *testing.B) {
b.SetParallelism(2)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
for i := 0; i < 100; i++ {
str := New(content)
consumer(str)
}
}
})
})
}