-
Notifications
You must be signed in to change notification settings - Fork 8
/
queryhash.go
122 lines (104 loc) · 2.79 KB
/
queryhash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// Copyright Suneido Software Corp. All rights reserved.
// Governed by the MIT license found in the LICENSE file.
package builtin
import (
"fmt"
"hash/adler32"
"slices"
. "github.com/apmckinlay/gsuneido/core"
"github.com/apmckinlay/gsuneido/util/generic/hmap"
"github.com/apmckinlay/gsuneido/util/generic/slc"
"github.com/apmckinlay/gsuneido/util/hacks"
)
var _ = builtin(QueryHash, "(query, details=false)")
type rowHash struct {
row Row
hash uint32
}
func QueryHash(th *Thread, args []Value) Value {
query := ToStr(args[0]) + `
/* CHECKQUERY SUPPRESS: PROJECT NOT UNIQUE */
/* CHECKQUERY SUPPRESS: UNION NOT DISJOINT */
/* CHECKQUERY SUPPRESS: JOIN MANY TO MANY */`
details := ToBool(args[1])
tran := th.Dbms().Transaction(false)
defer tran.Complete()
q := tran.Query(query, nil)
qh := NewQueryHasher(q.Header())
hfn := func(row rowHash) uint32 { return row.hash }
eqfn := func(x, y rowHash) bool {
return x.hash == y.hash && equalRow(x.row, y.row, qh.hdr, qh.fields)
}
rows := hmap.NewHmapFuncs[rowHash, struct{}](hfn, eqfn)
for row, _ := q.Get(th, Next); row != nil; row, _ = q.Get(th, Next) {
rh := rowHash{row: row, hash: qh.Row(row)}
if _, _, exists := rows.GetPut(rh, struct{}{}); exists {
panic("QueryHash: duplicate row")
}
}
return qh.Result(details)
}
func equalRow(x, y Row, hdr *Header, cols []string) bool {
for _, col := range cols {
if x.GetRaw(hdr, col) != y.GetRaw(hdr, col) {
return false
}
}
return true
}
//-------------------------------------------------------------------
type queryHasher struct {
hdr *Header
fields []string
ncols int
colsHash uint32
nrows int
hash uint32
}
func NewQueryHasher(hdr *Header) *queryHasher {
qh := queryHasher{}
qh.hdr = hdr
qh.fields = slc.Without(hdr.GetFields(), "-")
slices.Sort(qh.fields)
cols := slices.Clone(hdr.Columns)
slices.Sort(cols)
hash := uint32(31)
for _, col := range cols {
hash = hash*31 + adler32.Checksum(hacks.Stobs(col))
}
qh.ncols = len(cols)
qh.colsHash = hash
qh.hash = hash
return &qh
}
func (qh *queryHasher) Row(row Row) uint32 {
hash := uint32(0)
for _, fld := range qh.fields {
hash = hash*31 + hashPacked(row.GetRaw(qh.hdr, fld))
}
//TODO order sensitive if sorted
qh.hash += hash // '+' to ignore order
qh.nrows++
return hash
}
func hashPacked(p string) uint32 {
if len(p) > 0 && p[0] >= PackObject {
return hashObject(p)
}
return adler32.Checksum(hacks.Stobs(p))
}
func hashObject(p string) uint32 {
hash := uint32(0)
for i := 0; i < len(p); i++ {
// use simple addition to be insensitive to member order
hash += uint32(p[i])
}
return hash
}
func (qh *queryHasher) Result(details bool) Value {
if details {
return SuStr(fmt.Sprintln("nrows", qh.nrows, "hash", qh.hash,
"ncols", qh.ncols, "hash", qh.colsHash))
}
return IntVal(int(qh.hash))
}