/
search.go
224 lines (200 loc) · 7.17 KB
/
search.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
// Copyright 2018 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package model
import (
"context"
"go.chromium.org/gae/service/datastore"
"github.com/TriggerMail/luci-go/common/data/stringset"
"github.com/TriggerMail/luci-go/common/errors"
"github.com/TriggerMail/luci-go/common/retry/transient"
api "github.com/TriggerMail/luci-go/cipd/api/cipd/v1"
"github.com/TriggerMail/luci-go/cipd/common"
)
// SearchInstances lists instances of a package with all given tags attached.
//
// Only does a query over Instances entities. Doesn't check whether the Package
// entity exists. Returns up to pageSize entities, plus non-nil cursor (if
// there are more results). 'pageSize' must be positive.
func SearchInstances(c context.Context, pkg string, tags []*api.Tag, pageSize int32, cursor datastore.Cursor) (out []*Instance, nextCur datastore.Cursor, err error) {
switch {
case len(tags) == 0:
panic("tags must not be empty")
case pageSize <= 0:
panic("pageSize must be positive")
}
// Due to the way our entities organized, we can't efficiently query by all
// tags at once. Instead we query by the first tag only, and then filter the
// result to keep only instances that have the rest of tags attached. We do it
// chunk by chunk, until we assemble the full page.
seen := stringset.New(len(tags))
// The principal tag we query by.
mainTag := common.JoinInstanceTag(tags[0])
seen.Add(mainTag)
// Rest of the tags, dedupped, in the original order (the order influences the
// query efficiency, see below).
auxTags := make([]*api.Tag, 0, len(tags)-1)
for i := 1; i < len(tags); i++ {
if t := common.JoinInstanceTag(tags[i]); !seen.Has(t) {
auxTags = append(auxTags, tags[i])
seen.Add(t)
}
}
for int32(len(out)) < pageSize {
// Iterate over query on mainTag. This returns keys of instances that have
// mainTag attached.
var page []*datastore.Key
page, cursor, err = queryByTag(c, pkg, mainTag, cursor, pageSize-int32(len(out)))
if err != nil {
return nil, nil, err
}
// Filter out instances that don't have any of remaining tags[1:] tags
// attached.
//
// Note that we can potentially filter by all tags at once here, hitting
// len(page)*(len(tags)-1) entities in parallel. Instead we assume each
// additional tag noticeably limits number of results, so we sequentially
// filter the result tag by tag, eventually making fewer requests total.
//
// Assuming N = len(page), T = len(tags)-1, and k is a portion of a page
// left after filtering by one tag (e.g. k = 0.7 if 7 out of 10 results
// survive filtering by a tag):
//
// For parallel filtering we hit the following number of entities:
// N * T.
//
// For sequential filtering we hit:
// N * (1 + k + k^2 + ... + k^(T-1)) = N * (1.0 - k^(T-1)) / (1.0 - k).
//
// For N = 100, T = 4, k = 0.7 this results in:
// 400 instances hit via 1 RPC vs ~220 instances hit via 4 RPCs.
//
// The real life difference of course dramatically depends on k. But in
// general we expect the sequential strategy will be cheaper overall (in
// terms of combined Datastore + CPU cost). This hasn't been confirmed by
// hard numbers though.
for _, tag := range auxTags {
if len(page) == 0 {
break
}
page, err = filterByTag(c, page, tag)
if err != nil {
return nil, nil, err
}
}
// Fetch actual instance bodies. It is possible (though highly improbable),
// that some instances are gone already, so len(instances) may be less than
// len(page).
instances, err := fetchExistingInstances(c, page)
if err != nil {
return nil, nil, err
}
out = append(out, instances...)
if cursor == nil {
break // listed everything we could
}
}
nextCur = cursor
return
}
// queryByTag returns keys of Instances that have the given tag attached.
//
// Returns up to 'pageSize' of results, along with a cursor to continue the
// query or nil if it was the end of it.
func queryByTag(c context.Context, pkg, tag string, cursor datastore.Cursor, pageSize int32) (
out []*datastore.Key,
next datastore.Cursor,
err error) {
// TODO(vadimsh): 'registered_ts' here is when the tag was attached. The
// callers likely expect results ordered by instance registration time. This
// is not possible currently. We'll need to copy Instance.registered_ts into
// InstanceTag entities, so we can order by instance registration time.
// TODO(vadimsh): Should we exclude unprocessed instances from the result?
q := datastore.NewQuery("InstanceTag").
Ancestor(PackageKey(c, pkg)).
Eq("tag", tag).
Order("-registered_ts").
Limit(pageSize).
KeysOnly(true)
if cursor != nil {
q = q.Start(cursor)
}
err = datastore.Run(c, q, func(k *datastore.Key, cb datastore.CursorCB) error {
out = append(out, k.Parent())
if len(out) >= int(pageSize) {
if next, err = cb(); err != nil {
return err
}
return datastore.Stop
}
return nil
})
if err != nil {
return nil, nil, errors.Annotate(err, "failed to query by tag %q", tag).Tag(transient.Tag).Err()
}
return
}
// filterByTag takes keys of Instance entities and keeps only ones that have the
// given tag attached (preserving the order).
//
// Fetches the tags to verify their Tag fields really match 'tag' as a safeguard
// against malicious SHA1 collision on TagID.
func filterByTag(c context.Context, page []*datastore.Key, tag *api.Tag) ([]*datastore.Key, error) {
tagID := TagID(tag)
tagEnts := make([]*Tag, len(page))
for i, inst := range page {
tagEnts[i] = &Tag{
ID: tagID,
Instance: inst,
}
}
existing, _, err := fetchTags(c, tagEnts, func(int) *api.Tag { return tag })
if err != nil {
return nil, errors.Annotate(err, "failed by filter by tag %q", common.JoinInstanceTag(tag)).Err()
}
filtered := page[:0]
for _, tagEnt := range existing {
filtered = append(filtered, tagEnt.Instance)
}
return filtered, nil
}
// fetchExistingInstances fetches Instance entities given their keys.
//
// Skips missing ones.
func fetchExistingInstances(c context.Context, keys []*datastore.Key) ([]*Instance, error) {
instances := make([]*Instance, len(keys))
for i, k := range keys {
instances[i] = &Instance{
InstanceID: k.StringID(),
Package: k.Parent(),
}
}
err := datastore.Get(c, instances)
if err == nil {
return instances, nil
}
merr, ok := err.(errors.MultiError)
if !ok {
return nil, errors.Annotate(err, "failed to fetch instances").Tag(transient.Tag).Err()
}
existing := instances[:0]
for i, inst := range instances {
switch err := merr[i]; {
case err == nil:
existing = append(existing, inst)
case err != datastore.ErrNoSuchEntity:
return nil, errors.Annotate(err, "failed to fetch instance %q", inst.InstanceID).Tag(transient.Tag).Err()
}
}
return existing, nil
}