Skip to content

Commit

Permalink
Fix hash table size -- should be a power of two!
Browse files Browse the repository at this point in the history
As it was, due to the hash & uint64(len(items)-1), we were only using a
few of the entries in the table, causing many more collisions!

Thanks Arnaud!
  • Loading branch information
benhoyt committed Mar 9, 2024
1 parent 3eeb7f1 commit e9fe448
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
11 changes: 6 additions & 5 deletions r7.go
Expand Up @@ -28,8 +28,9 @@ func r7(inputPath string, output io.Writer) error {
key []byte
stat *stats
}
items := make([]item, 100000) // hash buckets, linearly probed
size := 0 // number of active items in items slice
const numBuckets = 1 << 17 // number of hash buckets (power of 2)
items := make([]item, numBuckets) // hash buckets, linearly probed
size := 0 // number of active items in items slice

buf := make([]byte, 1024*1024)
readStart := 0
Expand Down Expand Up @@ -94,7 +95,7 @@ func r7(inputPath string, output io.Writer) error {
}
chunk = after[index:]

hashIndex := int(hash & uint64(len(items)-1))
hashIndex := int(hash & uint64(numBuckets-1))
for {
if items[hashIndex].key == nil {
// Found empty slot, add new item (copying key).
Expand All @@ -110,7 +111,7 @@ func r7(inputPath string, output io.Writer) error {
},
}
size++
if size > len(items)/2 {
if size > numBuckets/2 {
panic("too many items in hash table")
}
break
Expand All @@ -126,7 +127,7 @@ func r7(inputPath string, output io.Writer) error {
}
// Slot already holds another key, try next slot (linear probe).
hashIndex++
if hashIndex >= len(items) {
if hashIndex >= numBuckets {
hashIndex = 0
}
}
Expand Down
11 changes: 6 additions & 5 deletions r9.go
Expand Up @@ -79,8 +79,9 @@ func r9ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan
key []byte
stat *r9Stats
}
items := make([]item, 100000) // hash buckets, linearly probed
size := 0 // number of active items in items slice
const numBuckets = 1 << 17 // number of hash buckets (power of 2)
items := make([]item, numBuckets) // hash buckets, linearly probed
size := 0 // number of active items in items slice

buf := make([]byte, 1024*1024)
readStart := 0
Expand Down Expand Up @@ -145,7 +146,7 @@ func r9ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan
}
chunk = after[index:]

hashIndex := int(hash & uint64(len(items)-1))
hashIndex := int(hash & (numBuckets - 1))
for {
if items[hashIndex].key == nil {
// Found empty slot, add new item (copying key).
Expand All @@ -161,7 +162,7 @@ func r9ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan
},
}
size++
if size > len(items)/2 {
if size > numBuckets/2 {
panic("too many items in hash table")
}
break
Expand All @@ -177,7 +178,7 @@ func r9ProcessPart(inputPath string, fileOffset, fileSize int64, resultsCh chan
}
// Slot already holds another key, try next slot (linear probe).
hashIndex++
if hashIndex >= len(items) {
if hashIndex >= numBuckets {
hashIndex = 0
}
}
Expand Down

0 comments on commit e9fe448

Please sign in to comment.