Skip to content

Commit

Permalink
fix: use LittleEndian.Uint64 directly
Browse files Browse the repository at this point in the history
name              old time/op    new time/op    delta
ClickHouse128-12     148ns ± 1%     139ns ± 1%  -5.93%  (p=0.000 n=21+25)

name              old speed      new speed      delta
ClickHouse128-12  6.93GB/s ± 1%  7.36GB/s ± 1%  +6.29%  (p=0.000 n=21+25)
  • Loading branch information
tdakkota committed Dec 24, 2021
1 parent b2ad54e commit 8e81405
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 87 deletions.
42 changes: 22 additions & 20 deletions 128.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package city

import "encoding/binary"

// much faster than uint64[2]

// U128 is uint128.
Expand All @@ -23,18 +25,18 @@ func cityMurmur(s []byte, seed U128) U128 {

tmp := c
if length >= 8 {
tmp = u64(s)
tmp = binary.LittleEndian.Uint64(s)
}
d = shiftMix(a + tmp)
} else { // length > 16
c = hash16(u64(s[length-8:])+k1, a)
d = hash16(b+uint64(length), c+u64(s[length-16:]))
c = hash16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
d = hash16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
a += d
for {
a ^= shiftMix(u64(s)*k1) * k1
a ^= shiftMix(binary.LittleEndian.Uint64(s)*k1) * k1
a *= k1
b ^= a
c ^= shiftMix(u64(s[8:])*k1) * k1
c ^= shiftMix(binary.LittleEndian.Uint64(s[8:])*k1) * k1
c *= k1
d ^= c
s = s[16:]
Expand Down Expand Up @@ -65,32 +67,32 @@ func Hash128Seed(s []byte, seed U128) U128 {
y := seed.High
z := uint64(len(s)) * k1

v.Low = rot64(y^k1, 49)*k1 + u64(s)
v.High = rot64(v.Low, 42)*k1 + u64(s[8:])
v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(s)
v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(s[8:])
w.Low = rot64(y+z, 35)*k1 + x
w.High = rot64(x+u64(s[88:]), 53) * k1
w.High = rot64(x+binary.LittleEndian.Uint64(s[88:]), 53) * k1

// This is the same inner loop as Hash64(), manually unrolled.
for len(s) >= 128 {
// Roll 1.
x = rot64(x+y+v.Low+u64(s[8:]), 37) * k1
y = rot64(y+v.High+u64(s[48:]), 42) * k1
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[8:]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
x ^= w.High
y += v.Low + u64(s[40:])
y += v.Low + binary.LittleEndian.Uint64(s[40:])
z = rot64(z+w.Low, 33) * k1
v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
w = weakHash32SeedsByte(s[32:], z+w.High, y+u64(s[16:]))
w = weakHash32SeedsByte(s[32:], z+w.High, y+binary.LittleEndian.Uint64(s[16:]))
z, x = x, z
s = s[64:]

// Roll 2.
x = rot64(x+y+v.Low+u64(s[8:]), 37) * k1
y = rot64(y+v.High+u64(s[48:]), 42) * k1
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[8:]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
x ^= w.High
y += v.Low + u64(s[40:])
y += v.Low + binary.LittleEndian.Uint64(s[40:])
z = rot64(z+w.Low, 33) * k1
v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
w = weakHash32SeedsByte(s[32:], z+w.High, y+u64(s[16:]))
w = weakHash32SeedsByte(s[32:], z+w.High, y+binary.LittleEndian.Uint64(s[16:]))
z, x = x, z
s = s[64:]
}
Expand All @@ -105,9 +107,9 @@ func Hash128Seed(s []byte, seed U128) U128 {
for i := 0; i < len(s); {
i += 32
y = rot64(x+y, 42)*k0 + v.High
w.Low += u64(t[len(t)-i+16:])
w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
x = x*k0 + w.Low
z += w.High + u64(t[len(t)-i:])
z += w.High + binary.LittleEndian.Uint64(t[len(t)-i:])
w.High += v.Low
v = weakHash32SeedsByte(t[len(t)-i:], v.Low+z, v.High)
v.Low *= k0
Expand All @@ -133,8 +135,8 @@ func Hash128Seed(s []byte, seed U128) U128 {
func Hash128(s []byte) U128 {
if len(s) >= 16 {
return Hash128Seed(s[16:], U128{
Low: u64(s),
High: u64(s[8:]) + k0},
Low: binary.LittleEndian.Uint64(s),
High: binary.LittleEndian.Uint64(s[8:]) + k0},
)
}
return Hash128Seed(s, U128{Low: k0, High: k1})
Expand Down
57 changes: 26 additions & 31 deletions 64.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@ func bswap64(x uint64) uint64 {
((x & 0x00000000000000ff) << 56)
}

// u64 reads uint64 in little endian from p.
func u64(p []byte) uint64 {
return binary.LittleEndian.Uint64(p)
}

// Bitwise right rotate.
func rot64(val uint64, shift uint) uint64 {
// Avoid shifting by 64: doing so yields an undefined result.
Expand Down Expand Up @@ -58,8 +53,8 @@ func hash16mul(u, v, mul uint64) uint64 {
func hash0to16(s []byte, length int) uint64 {
if length >= 8 {
mul := k2 + uint64(length)*2
a := u64(s) + k2
b := u64(s[length-8:])
a := binary.LittleEndian.Uint64(s) + k2
b := binary.LittleEndian.Uint64(s[length-8:])
c := rot64(b, 37)*mul + a
d := (rot64(a, 25) + b) * mul
return hash16mul(c, d, mul)
Expand Down Expand Up @@ -90,10 +85,10 @@ func hash0to16(s []byte, length int) uint64 {
// in that case
func hash17to32(s []byte, length int) uint64 {
mul := k2 + uint64(length)*2
a := u64(s) * k1
b := u64(s[8:])
c := u64(s[length-8:]) * mul
d := u64(s[length-16:]) * k2
a := binary.LittleEndian.Uint64(s) * k1
b := binary.LittleEndian.Uint64(s[8:])
c := binary.LittleEndian.Uint64(s[length-8:]) * mul
d := binary.LittleEndian.Uint64(s[length-16:]) * k2
return hash16mul(
rot64(a+b, 43)+rot64(c, 30)+d,
a+rot64(b+k2, 18)+c,
Expand All @@ -117,10 +112,10 @@ func weakHash32Seeds(w, x, y, z, a, b uint64) U128 {
func weakHash32SeedsByte(s []byte, a, b uint64) U128 {
_ = s[31]
return weakHash32Seeds(
u64(s[0:0+8:0+8]),
u64(s[8:8+8:8+8]),
u64(s[16:16+8:16+8]),
u64(s[24:24+8:24+8]),
binary.LittleEndian.Uint64(s[0:0+8:0+8]),
binary.LittleEndian.Uint64(s[8:8+8:8+8]),
binary.LittleEndian.Uint64(s[16:16+8:16+8]),
binary.LittleEndian.Uint64(s[24:24+8:24+8]),
a,
b,
)
Expand All @@ -129,14 +124,14 @@ func weakHash32SeedsByte(s []byte, a, b uint64) U128 {
// Return an 8-byte hash for 33 to 64 bytes.
func hash33to64(s []byte, length int) uint64 {
mul := k2 + uint64(length)*2
a := u64(s) * k2
b := u64(s[8:])
c := u64(s[length-24:])
d := u64(s[length-32:])
e := u64(s[16:]) * k2
f := u64(s[24:]) * 9
g := u64(s[length-8:])
h := u64(s[length-16:]) * mul
a := binary.LittleEndian.Uint64(s) * k2
b := binary.LittleEndian.Uint64(s[8:])
c := binary.LittleEndian.Uint64(s[length-24:])
d := binary.LittleEndian.Uint64(s[length-32:])
e := binary.LittleEndian.Uint64(s[16:]) * k2
f := binary.LittleEndian.Uint64(s[24:]) * 9
g := binary.LittleEndian.Uint64(s[length-8:])
h := binary.LittleEndian.Uint64(s[length-16:]) * mul
u := rot64(a+g, 43) + (rot64(b, 30)+c)*9
v := ((a + g) ^ d) + f + 1
w := bswap64((u+v)*mul) + h
Expand Down Expand Up @@ -169,23 +164,23 @@ func Hash64(s []byte) uint64 {

// For string over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y and z.
x := u64(s[length-40:])
y := u64(s[length-16:]) + u64(s[length-56:])
z := hash16(u64(s[length-48:])+uint64(length), u64(s[length-24:]))
x := binary.LittleEndian.Uint64(s[length-40:])
y := binary.LittleEndian.Uint64(s[length-16:]) + binary.LittleEndian.Uint64(s[length-56:])
z := hash16(binary.LittleEndian.Uint64(s[length-48:])+uint64(length), binary.LittleEndian.Uint64(s[length-24:]))
v := weakHash32SeedsByte(s[length-64:], uint64(length), z)
w := weakHash32SeedsByte(s[length-32:], y+k1, x)
x = x*k1 + u64(s)
x = x*k1 + binary.LittleEndian.Uint64(s)

// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
s = s[:nearestMultiple64(s)]
for len(s) > 0 {
x = rot64(x+y+v.Low+u64(s[8:]), 37) * k1
y = rot64(y+v.High+u64(s[48:]), 42) * k1
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[8:]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
x ^= w.High
y += v.Low + u64(s[40:])
y += v.Low + binary.LittleEndian.Uint64(s[40:])
z = rot64(z+w.Low, 33) * k1
v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
w = weakHash32SeedsByte(s[32:], z+w.High, y+u64(s[16:]))
w = weakHash32SeedsByte(s[32:], z+w.High, y+binary.LittleEndian.Uint64(s[16:]))

z, x = x, z
s = s[64:]
Expand Down
36 changes: 19 additions & 17 deletions ch_128.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package city

import "encoding/binary"

// A subroutine for CH128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Mumur.
func chMurmur(s []byte, seed U128) U128 {
Expand All @@ -14,20 +16,20 @@ func chMurmur(s []byte, seed U128) U128 {
c = b*k1 + ch0to16(s, length)

if length >= 8 {
d = shiftMix(a + u64(s))
d = shiftMix(a + binary.LittleEndian.Uint64(s))
} else {
d = shiftMix(a + c)
}
} else { // length > 16
c = ch16(u64(s[length-8:])+k1, a)
d = ch16(b+uint64(length), c+u64(s[length-16:]))
c = ch16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
d = ch16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
a += d

for {
a ^= shiftMix(u64(s)*k1) * k1
a ^= shiftMix(binary.LittleEndian.Uint64(s)*k1) * k1
a *= k1
b ^= a
c ^= shiftMix(u64(s[8:])*k1) * k1
c ^= shiftMix(binary.LittleEndian.Uint64(s[8:])*k1) * k1
c *= k1
d ^= c
s = s[16:]
Expand All @@ -47,15 +49,15 @@ func chMurmur(s []byte, seed U128) U128 {
func CH128(s []byte) U128 {
if len(s) >= 16 {
return CH128Seed(s[16:], U128{
Low: u64(s) ^ k3,
High: u64(s[8:]),
Low: binary.LittleEndian.Uint64(s) ^ k3,
High: binary.LittleEndian.Uint64(s[8:]),
})
}
if len(s) >= 8 {
l := uint64(len(s))
return CH128Seed(nil, U128{
Low: u64(s) ^ (l * k0),
High: u64(s[l-8:]) ^ k1,
Low: binary.LittleEndian.Uint64(s) ^ (l * k0),
High: binary.LittleEndian.Uint64(s[l-8:]) ^ k1,
})
}
return CH128Seed(s, U128{Low: k0, High: k1})
Expand All @@ -77,16 +79,16 @@ func CH128Seed(s []byte, seed U128) U128 {
y := seed.High
z := uint64(len(s)) * k1

v.Low = rot64(y^k1, 49)*k1 + u64(s)
v.High = rot64(v.Low, 42)*k1 + u64(s[8:])
v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(s)
v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(s[8:])
w.Low = rot64(y+z, 35)*k1 + x
w.High = rot64(x+u64(s[88:]), 53) * k1
w.High = rot64(x+binary.LittleEndian.Uint64(s[88:]), 53) * k1

// This is the same inner loop as CH64(), manually unrolled.
for len(s) >= 128 {
// Roll 1.
x = rot64(x+y+v.Low+u64(s[16:]), 37) * k1
y = rot64(y+v.High+u64(s[48:]), 42) * k1
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1

x ^= w.High
y ^= v.Low
Expand All @@ -98,8 +100,8 @@ func CH128Seed(s []byte, seed U128) U128 {
s = s[64:]

// Roll 2.
x = rot64(x+y+v.Low+u64(s[16:]), 37) * k1
y = rot64(y+v.High+u64(s[48:]), 42) * k1
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1
x ^= w.High
y ^= v.Low

Expand All @@ -117,7 +119,7 @@ func CH128Seed(s []byte, seed U128) U128 {
for i := 0; i < len(s); {
i += 32
y = rot64(y-x, 42)*k0 + v.High
w.Low += u64(t[len(t)-i+16:])
w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
x = rot64(x, 49)*k0 + w.Low
w.Low += v.Low
v = weakHash32SeedsByte(t[len(t)-i:], v.Low, v.High)
Expand Down
40 changes: 21 additions & 19 deletions ch_64.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package city

import "encoding/binary"

// Ref:
// https://github.com/xzkostyan/python-cityhash/commit/f4091154ff2c6c0de11d5d6673b5007fdd6355ad

Expand All @@ -11,25 +13,25 @@ func ch16(u, v uint64) uint64 {

// Return an 8-byte hash for 33 to 64 bytes.
func ch33to64(s []byte, length int) uint64 {
z := u64(s[24:])
a := u64(s) + (uint64(length)+u64(s[length-16:]))*k0
z := binary.LittleEndian.Uint64(s[24:])
a := binary.LittleEndian.Uint64(s) + (uint64(length)+binary.LittleEndian.Uint64(s[length-16:]))*k0
b := rot64(a+z, 52)
c := rot64(a, 37)

a += u64(s[8:])
a += binary.LittleEndian.Uint64(s[8:])
c += rot64(a, 7)
a += u64(s[16:])
a += binary.LittleEndian.Uint64(s[16:])

vf := a + z
vs := b + rot64(a, 31) + c

a = u64(s[16:]) + u64(s[length-32:])
z = u64(s[length-8:])
a = binary.LittleEndian.Uint64(s[16:]) + binary.LittleEndian.Uint64(s[length-32:])
z = binary.LittleEndian.Uint64(s[length-8:])
b = rot64(a+z, 52)
c = rot64(a, 37)
a += u64(s[length-24:])
a += binary.LittleEndian.Uint64(s[length-24:])
c += rot64(a, 7)
a += u64(s[length-16:])
a += binary.LittleEndian.Uint64(s[length-16:])

wf := a + z
ws := b + rot64(a, 31) + c
Expand All @@ -38,10 +40,10 @@ func ch33to64(s []byte, length int) uint64 {
}

func ch17to32(s []byte, length int) uint64 {
a := u64(s) * k1
b := u64(s[8:])
c := u64(s[length-8:]) * k2
d := u64(s[length-16:]) * k0
a := binary.LittleEndian.Uint64(s) * k1
b := binary.LittleEndian.Uint64(s[8:])
c := binary.LittleEndian.Uint64(s[length-8:]) * k2
d := binary.LittleEndian.Uint64(s[length-16:]) * k0
return hash16(
rot64(a-b, 43)+rot64(c, 30)+d,
a+rot64(b^k3, 20)-c+uint64(length),
Expand All @@ -50,8 +52,8 @@ func ch17to32(s []byte, length int) uint64 {

func ch0to16(s []byte, length int) uint64 {
if length > 8 {
a := u64(s)
b := u64(s[length-8:])
a := binary.LittleEndian.Uint64(s)
b := binary.LittleEndian.Uint64(s[length-8:])
return ch16(a, rot64(b+uint64(length), uint(length))) ^ b
}
if length >= 4 {
Expand Down Expand Up @@ -82,9 +84,9 @@ func CH64(s []byte) uint64 {
return ch33to64(s, length)
}

x := u64(s)
y := u64(s[length-16:]) ^ k1
z := u64(s[length-56:]) ^ k0
x := binary.LittleEndian.Uint64(s)
y := binary.LittleEndian.Uint64(s[length-16:]) ^ k1
z := binary.LittleEndian.Uint64(s[length-56:]) ^ k0

v := weakHash32SeedsByte(s[length-64:], uint64(length), y)
w := weakHash32SeedsByte(s[length-32:], uint64(length)*k1, k0)
Expand All @@ -95,8 +97,8 @@ func CH64(s []byte) uint64 {
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
s = s[:nearestMultiple64(s)]
for len(s) > 0 {
x = rot64(x+y+v.Low+u64(s[16:]), 37) * k1
y = rot64(y+v.High+u64(s[48:]), 42) * k1
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:]), 42) * k1

x ^= w.High
y ^= v.Low
Expand Down

0 comments on commit 8e81405

Please sign in to comment.