/
searcher.go
196 lines (176 loc) · 5.69 KB
/
searcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
package chat
import (
"context"
"regexp"
"github.com/keybase/client/go/chat/globals"
"github.com/keybase/client/go/chat/utils"
"github.com/keybase/client/go/protocol/chat1"
"github.com/keybase/client/go/protocol/gregor1"
)
const defaultPageSize = 300
const MaxAllowedSearchHits = 10000
const MaxAllowedSearchMessages = 100000
type Searcher struct {
globals.Contextified
utils.DebugLabeler
pageSize int
}
func NewSearcher(g *globals.Context) *Searcher {
labeler := utils.NewDebugLabeler(g.GetLog(), "searcher", false)
return &Searcher{
Contextified: globals.NewContextified(g),
DebugLabeler: labeler,
pageSize: defaultPageSize,
}
}
func (s *Searcher) SearchRegexp(ctx context.Context, uiCh chan chat1.ChatSearchHit, convID chat1.ConversationID,
re *regexp.Regexp, opts chat1.SearchOpts) (hits []chat1.ChatSearchHit, err error) {
uid := gregor1.UID(s.G().Env.GetUID().ToBytes())
pagination := &chat1.Pagination{Num: s.pageSize}
sentBy := opts.SentBy
maxHits := opts.MaxHits
maxMessages := opts.MaxMessages
beforeContext := opts.BeforeContext
afterContext := opts.AfterContext
// Context cannot exceed the page size.
if beforeContext >= s.pageSize {
beforeContext = s.pageSize - 1
}
if afterContext >= s.pageSize {
afterContext = s.pageSize - 1
}
if maxHits > MaxAllowedSearchHits {
maxHits = MaxAllowedSearchHits
}
if maxMessages > MaxAllowedSearchMessages {
maxMessages = MaxAllowedSearchMessages
}
// If we have to gather search result context around a pagination boundary,
// we may have to fetch the next page of the thread
var prevPage, curPage, nextPage *chat1.ThreadView
getNextPage := func() (*chat1.ThreadView, error) {
thread, err := s.G().ConvSource.Pull(ctx, convID, uid,
chat1.GetThreadReason_SEARCHER,
&chat1.GetThreadQuery{
MessageTypes: []chat1.MessageType{chat1.MessageType_TEXT},
}, pagination)
if err != nil {
return nil, err
}
filteredMsgs := []chat1.MessageUnboxed{}
// Filter out invalid/exploded messages so our search context is
// correct.
for _, msg := range thread.Messages {
if msg.IsValid() && msg.GetMessageType() == chat1.MessageType_TEXT && !msg.Valid().MessageBody.IsNil() {
filteredMsgs = append(filteredMsgs, msg)
}
}
thread.Messages = filteredMsgs
pagination = thread.Pagination
pagination.Num = s.pageSize
pagination.Previous = nil
return &thread, nil
}
// Returns search context before the search hit, at position `i` in
// `cur.Messages` possibly fetching and returning a new page of results if
// we are at a pagination boundary.
getBeforeMsgs := func(i int, cur, next *chat1.ThreadView) (*chat1.ThreadView, []chat1.MessageUnboxed, error) {
// context is contained entirely in this page of the thread.
if i+beforeContext < len(cur.Messages) {
return next, cur.Messages[i+1 : i+beforeContext+1], nil
}
// Get all of the context after our hit index of the current page and fetch a new page if available.
hitContext := cur.Messages[i+1:]
if next == nil {
next, err = getNextPage()
if err != nil {
return nil, nil, err
}
}
// Get the remaining context from the new current page of the thread.
remainingContext := beforeContext - len(hitContext)
if remainingContext > len(next.Messages) {
remainingContext = len(next.Messages)
}
hitContext = append(next.Messages[:remainingContext], hitContext...)
return next, hitContext, nil
}
// Returns the search context surrounding a search result at index `i` in
// `cur.Messages`, possibly using prev if we are at a
// pagination boundary (since msgs are ordered last to first).
getAfterMsgs := func(i int, prev, cur *chat1.ThreadView) []chat1.MessageUnboxed {
// Return context from the current thread only
if afterContext < i {
return cur.Messages[i-afterContext : i]
}
hitContext := cur.Messages[:i]
if prev != nil {
// Get the remaining context from the previous page of the thread.
remainingContext := len(prev.Messages) - (afterContext - len(hitContext))
if remainingContext < 0 {
remainingContext = 0
}
hitContext = append(hitContext, prev.Messages[remainingContext:]...)
}
return hitContext
}
// Order messages ascending by ID for presentation
getUIMsgs := func(msgs []chat1.MessageUnboxed) (uiMsgs []chat1.UIMessage) {
for i := len(msgs) - 1; i >= 0; i-- {
msg := msgs[i]
uiMsg := utils.PresentMessageUnboxed(ctx, s.G(), msg, uid, convID)
uiMsgs = append(uiMsgs, uiMsg)
}
return uiMsgs
}
numHits := 0
numMessages := 0
for !pagination.Last && numHits < maxHits && numMessages < maxMessages {
prevPage = curPage
if nextPage == nil {
curPage, err = getNextPage()
if err != nil {
return nil, err
}
} else { // we pre-fetched the next page when retrieving context
curPage = nextPage
nextPage = nil
}
for i, msg := range curPage.Messages {
numMessages++
if sentBy != "" && msg.Valid().SenderUsername != sentBy {
continue
}
msgText := msg.Valid().MessageBody.Text().Body
matches := re.FindAllString(msgText, -1)
if matches != nil {
numHits++
afterMsgs := getAfterMsgs(i, prevPage, curPage)
newThread, beforeMsgs, err := getBeforeMsgs(i, curPage, nextPage)
if err != nil {
return nil, err
}
nextPage = newThread
searchHit := chat1.ChatSearchHit{
BeforeMessages: getUIMsgs(beforeMsgs),
HitMessage: utils.PresentMessageUnboxed(ctx, s.G(), msg, uid, convID),
AfterMessages: getUIMsgs(afterMsgs),
Matches: matches,
}
if uiCh != nil {
// Stream search hits back to the UI
// channel
uiCh <- searchHit
}
hits = append(hits, searchHit)
}
if numHits >= maxHits || numMessages >= maxMessages {
break
}
}
}
if uiCh != nil {
close(uiCh)
}
return hits, nil
}