-
Notifications
You must be signed in to change notification settings - Fork 646
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
零拷贝实现string 和bytes的转换疑问 #7
Comments
func string2bytes(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s))
} 附:性能对比 // main.go
package main
import (
"reflect"
"unsafe"
)
func string2bytes1(s string) []byte {
stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s))
var b []byte
pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
pbytes.Data = stringHeader.Data
pbytes.Len = stringHeader.Len
pbytes.Cap = stringHeader.Len
return b
}
func string2bytes2(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s))
} // main_test.go
package main
import (
"fmt"
"math/rand"
"reflect"
"testing"
"time"
)
func TestString2Bytes(t *testing.T) {
s := "qcrao/Go-Questions"
if string(string2bytes2(s)) != s {
t.Fatalf("string2bytes2 is not properly implemented")
}
if !reflect.DeepEqual(string2bytes1(s), string2bytes2(s)) {
t.Fatalf("strings2bytes implementation does not match")
}
}
func init() {
rand.Seed(time.Now().UnixNano())
}
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func genstring(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
func BenchmarkString2Bytes(b *testing.B) {
funcs := map[string]func(string) []byte{
"string2bytes1": string2bytes1,
"string2bytes2": string2bytes2,
}
for name, f := range funcs {
for i := 1; i < 10000; i *= 10 {
s := genstring(i)
b.Run(fmt.Sprintf("%v-%v", name, i), func(b *testing.B) {
for i := 0; i < b.N; i++ {
f(s)
}
})
}
}
} $ go test -v -run=none -bench=. -benchmem -count=10 . | tee bench.txt
$ benchstat bench.txt
name time/op
String2Bytes/string2bytes1-1-12 3.07ns ± 1%
String2Bytes/string2bytes1-10-12 3.08ns ± 2%
String2Bytes/string2bytes1-100-12 3.08ns ± 1%
String2Bytes/string2bytes1-1000-12 3.08ns ± 0%
String2Bytes/string2bytes1-10000-12 3.07ns ± 1%
String2Bytes/string2bytes2-1-12 1.95ns ± 2%
String2Bytes/string2bytes2-10-12 1.95ns ± 2%
String2Bytes/string2bytes2-100-12 1.94ns ± 1%
String2Bytes/string2bytes2-1000-12 1.95ns ± 1%
String2Bytes/string2bytes2-10000-12 1.96ns ± 3%
name alloc/op
String2Bytes/string2bytes1-1-12 0.00B
String2Bytes/string2bytes1-10-12 0.00B
String2Bytes/string2bytes1-100-12 0.00B
String2Bytes/string2bytes1-1000-12 0.00B
String2Bytes/string2bytes1-10000-12 0.00B
String2Bytes/string2bytes2-1-12 0.00B
String2Bytes/string2bytes2-10-12 0.00B
String2Bytes/string2bytes2-100-12 0.00B
String2Bytes/string2bytes2-1000-12 0.00B
String2Bytes/string2bytes2-10000-12 0.00B |
@changkun string2bytes2 转换函数严格意义上来讲是错误的,因为转换的时候并未正常给 cap 赋值。 package main
import (
"fmt"
"reflect"
"runtime"
"unsafe"
)
func string2bytes1(s string) []byte {
stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s))
var b []byte
pBytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
pBytes.Data = stringHeader.Data
pBytes.Len = stringHeader.Len
pBytes.Cap = stringHeader.Len
runtime.KeepAlive(s)
return b
}
func string2bytes2(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s))
}
func main() {
s1 := string2bytes1("Roger")
fmt.Println(s1)
fmt.Println(len(s1))
fmt.Println(cap(s1))
s2 := string2bytes2("Roger")
fmt.Println(s2)
fmt.Println(len(s2))
fmt.Println(cap(s2))
} s2 的 cap 输出将会是一个随机值。
|
@luojiego 不好意思,我认为这是实现者的决策,而不是正确与否的问题。如果我们要讨论「严格意义」上说,你不应该做这种实现,要么老老实实带拷贝的转换,要么用标准库 另外, |
OK,非常感谢! |
强转的一个问题是转换后的 byte slice cap 很大,这个是不好的,比如 https://play.golang.org/p/_tqfAgxlZAv ,所以简单粗暴的强转不可取,因为无法拿到 byte slice 的 cap,一个性能较好的实现是 fasthttp 的( https://github.com/valyala/fasthttp/blob/c48d3735fa9864a7c1724168812f3571c8313581/bytesconv.go#L387 )。 |
为什么 cap 值会这么大?从汇编代码看貌似 cap 值为字符串的 Data 的地址值,但又不是稳定复现的 |
// StringHeader is the runtime representation of a string.
// It cannot be used safely or portably and its representation may
// change in a later release.
// Moreover, the Data field is not sufficient to guarantee the data
// it references will not be garbage collected, so programs must keep
// a separate, correctly typed pointer to the underlying data.
type StringHeader struct {
Data uintptr
Len int
}
// SliceHeader is the runtime representation of a slice.
// It cannot be used safely or portably and its representation may
// change in a later release.
// Moreover, the Data field is not sufficient to guarantee the data
// it references will not be garbage collected, so programs must keep
// a separate, correctly typed pointer to the underlying data.
type SliceHeader struct {
Data uintptr
Len int
Cap int
} |
1,这里的Data 是一个uintptr整型,把stringHeader.Data作为值拷贝,后面gc不会移动或者回收该uintptr指向的内存吗?
2,在官方文档里面的描述是这样的:
你的转换函数是不是换成下面的更好?
The text was updated successfully, but these errors were encountered: