forked from ironsweet/golucene
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reader.go
362 lines (320 loc) · 9.63 KB
/
reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
package index
import (
// "errors"
"errors"
"fmt"
. "github.com/gzg1984/golucene/core/codec/spi"
docu "github.com/gzg1984/golucene/core/document"
. "github.com/gzg1984/golucene/core/index/model"
"github.com/gzg1984/golucene/core/util"
"io"
"reflect"
"sync"
"sync/atomic"
)
type IndexReader interface {
io.Closer
decRef() error
ensureOpen()
registerParentReader(r IndexReader)
NumDocs() int
MaxDoc() int
/** Expert: visits the fields of a stored document, for
* custom processing/loading of each field. If you
* simply want to load all fields, use {@link
* #document(int)}. If you want to load a subset, use
* {@link DocumentStoredFieldVisitor}. */
VisitDocument(docID int, visitor StoredFieldVisitor) error
/**
* Returns the stored fields of the <code>n</code><sup>th</sup>
* <code>Document</code> in this index. This is just
* sugar for using {@link DocumentStoredFieldVisitor}.
* <p>
* <b>NOTE:</b> for performance reasons, this method does not check if the
* requested document is deleted, and therefore asking for a deleted document
* may yield unspecified results. Usually this is not required, however you
* can test if the doc is deleted by checking the {@link
* Bits} returned from {@link MultiFields#getLiveDocs}.
*
* <b>NOTE:</b> only the content of a field is returned,
* if that field was stored during indexing. Metadata
* like boost, omitNorm, IndexOptions, tokenized, etc.,
* are not preserved.
*
* @throws IOException if there is a low-level IO error
*/
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
//model.IndexableField
Document(docID int) (doc *docu.Document, err error)
doClose() error
Context() IndexReaderContext
Leaves() []*AtomicReaderContext
// Returns the number of documents containing the term. This method
// returns 0 if the term of field does not exists. This method does
// not take into account deleted documents that have not yet been
// merged away.
DocFreq(*Term) (int, error)
}
/* A custom listener that's invoked when the IndexReader is closed. */
type ReaderClosedListener interface {
onClose(IndexReader)
}
type IndexReaderImplSPI interface {
NumDocs() int
MaxDoc() int
VisitDocument(int, StoredFieldVisitor) error
doClose() error
Context() IndexReaderContext
DocFreq(*Term) (int, error)
}
type IndexReaderImpl struct {
IndexReaderImplSPI
lock sync.Mutex
closed bool
closedByChild bool
refCount int32 // synchronized
parentReaders map[IndexReader]bool
parentReadersLock sync.RWMutex
readerClosedListeners map[ReaderClosedListener]bool
readerClosedListenersLock sync.RWMutex
}
func newIndexReader(spi IndexReaderImplSPI) *IndexReaderImpl {
return &IndexReaderImpl{
IndexReaderImplSPI: spi,
refCount: 1,
parentReaders: make(map[IndexReader]bool),
}
}
func (r *IndexReaderImpl) decRef() error {
// only check refcount here (don't call ensureOpen()), so we can
// still close the reader if it was made invalid by a child:
assert2(r.refCount > 0, "this IndexReader is closed")
rc := atomic.AddInt32(&r.refCount, -1)
assert2(rc >= 0, "too many decRef calls: refCount is %v after decrement", rc)
if rc == 0 {
r.closed = true
var err error
defer func() {
defer r.notifyReaderClosedListeners(err)
r.reportCloseToParentReaders()
}()
return r.doClose()
}
return nil
}
func (r *IndexReaderImpl) ensureOpen() {
if atomic.LoadInt32(&r.refCount) <= 0 {
panic("this IndexReader is closed")
}
// the happens before rule on reading the refCount, which must be after the fake write,
// ensures that we see the value:
if r.closedByChild {
panic("this IndexReader cannot be used anymore as one of its child readers was closed")
}
}
func (r *IndexReaderImpl) registerParentReader(reader IndexReader) {
r.ensureOpen()
r.parentReadersLock.Lock()
defer r.parentReadersLock.Unlock()
r.parentReaders[reader] = true
}
func (r *IndexReaderImpl) notifyReaderClosedListeners(err error) {
r.readerClosedListenersLock.RLock()
defer r.readerClosedListenersLock.RUnlock()
for listener, _ := range r.readerClosedListeners {
func() {
defer func() {
if e := recover(); e != nil {
err = mergeError(err, errors.New(fmt.Sprintf("%v", e)))
}
}()
listener.onClose(r)
}()
}
return
}
func (r *IndexReaderImpl) reportCloseToParentReaders() {
r.parentReadersLock.RLock()
defer r.parentReadersLock.RUnlock()
for parent, _ := range r.parentReaders {
if p, ok := parent.(*IndexReaderImpl); ok {
p.closedByChild = true
// cross memory barrier by a fake write:
// FIXME do we need it in Go?
atomic.AddInt32(&p.refCount, 0)
// recurse:
p.reportCloseToParentReaders()
} else if p, ok := parent.(*BaseCompositeReader); ok {
p.closedByChild = true
// cross memory barrier by a fake write:
// FIXME do we need it in Go?
atomic.AddInt32(&p.refCount, 0)
// recurse:
p.reportCloseToParentReaders()
} else {
panic(fmt.Sprintf("Unknown IndexReader type: %v", reflect.TypeOf(parent).Name()))
}
}
}
/* Returns the number of deleted documents. */
func (r *IndexReaderImpl) numDeletedDocs() int {
return r.MaxDoc() - r.NumDocs()
}
func (r *IndexReaderImpl) Document(docID int) (doc *docu.Document, err error) {
visitor := docu.NewDocumentStoredFieldVisitor()
if err = r.VisitDocument(docID, visitor); err != nil {
return nil, err
}
return visitor.Document(), nil
}
/*
Returns true if any documents have been deleted. Implementers should
consider overriding this method if maxDoc() or numDocs() are not
constant-time operations.
*/
func (r *IndexReaderImpl) hasDeletions() bool {
return r.numDeletedDocs() > 0
}
func (r *IndexReaderImpl) Close() error {
r.lock.Lock()
defer r.lock.Unlock()
if !r.closed {
if err := r.decRef(); err != nil {
return err
}
r.closed = true
}
return nil
}
func (r *IndexReaderImpl) Leaves() []*AtomicReaderContext {
return r.Context().Leaves()
}
type IndexReaderContext interface {
Reader() IndexReader
Parent() *CompositeReaderContext
Leaves() []*AtomicReaderContext
Children() []IndexReaderContext
}
type IndexReaderContextImpl struct {
parent *CompositeReaderContext
isTopLevel bool
docBaseInParent int
ordInParent int
}
func newIndexReaderContext(parent *CompositeReaderContext, ordInParent, docBaseInParent int) *IndexReaderContextImpl {
return &IndexReaderContextImpl{
parent: parent,
isTopLevel: parent == nil,
docBaseInParent: docBaseInParent,
ordInParent: ordInParent}
}
func (ctx *IndexReaderContextImpl) Parent() *CompositeReaderContext {
return ctx.parent
}
type ARFieldsReader interface {
Terms(field string) Terms
Fields() Fields
LiveDocs() util.Bits
/** Returns {@link NumericDocValues} representing norms
* for this field, or null if no {@link NumericDocValues}
* were indexed. The returned instance should only be
* used by a single thread. */
NormValues(field string) (ndv NumericDocValues, err error)
}
type AtomicReader interface {
IndexReader
ARFieldsReader
}
type AtomicReaderImplSPI interface {
IndexReaderImplSPI
ARFieldsReader
}
type AtomicReaderImpl struct {
*IndexReaderImpl
ARFieldsReader
readerContext *AtomicReaderContext
}
func newAtomicReader(spi AtomicReaderImplSPI) *AtomicReaderImpl {
r := &AtomicReaderImpl{
IndexReaderImpl: newIndexReader(spi),
ARFieldsReader: spi,
}
r.readerContext = newAtomicReaderContextFromReader(r)
return r
}
func (r *AtomicReaderImpl) Context() IndexReaderContext {
r.ensureOpen()
return r.readerContext
}
func (r *AtomicReaderImpl) DocFreq(term *Term) (int, error) {
if fields := r.Fields(); fields != nil {
if terms := fields.Terms(term.Field); terms != nil {
termsEnum := terms.Iterator(nil)
ok, err := termsEnum.SeekExact(term.Bytes)
if err != nil {
return 0, err
}
if ok {
return termsEnum.DocFreq()
}
}
}
return 0, nil
}
func (r *AtomicReaderImpl) TotalTermFreq(term *Term) (n int64, err error) {
panic("not implemented yet")
}
func (r *AtomicReaderImpl) SumDocFreq(field string) (n int64, err error) {
panic("not implemented yet")
}
func (r *AtomicReaderImpl) DocCount(field string) (n int, err error) {
panic("not implemented yet")
}
func (r *AtomicReaderImpl) SumTotalTermFreq(field string) (n int64, err error) {
panic("not implemented yet")
}
func (r *AtomicReaderImpl) Terms(field string) Terms {
fields := r.Fields()
if fields == nil {
return nil
}
return fields.Terms(field)
}
type AtomicReaderContext struct {
*IndexReaderContextImpl
Ord, DocBase int
reader AtomicReader
leaves []*AtomicReaderContext
}
func (ctx *AtomicReaderContext) String() string {
return fmt.Sprintf("AtomicReaderContext{%v ord=%v docBase=%v %v}",
ctx.IndexReaderContextImpl, ctx.Ord, ctx.DocBase, ctx.reader)
}
func newAtomicReaderContextFromReader(r AtomicReader) *AtomicReaderContext {
return newAtomicReaderContext(nil, r, 0, 0, 0, 0)
}
func newAtomicReaderContext(parent *CompositeReaderContext, reader AtomicReader, ord, docBase, leafOrd, leafDocBase int) *AtomicReaderContext {
ans := &AtomicReaderContext{}
ans.IndexReaderContextImpl = newIndexReaderContext(parent, ord, docBase)
ans.Ord = leafOrd
ans.DocBase = leafDocBase
ans.reader = reader
if ans.isTopLevel {
ans.leaves = []*AtomicReaderContext{ans}
}
return ans
}
func (ctx *AtomicReaderContext) Leaves() []*AtomicReaderContext {
if !ctx.IndexReaderContextImpl.isTopLevel {
panic("This is not a top-level context.")
}
// assert leaves != null
return ctx.leaves
}
func (ctx *AtomicReaderContext) Children() []IndexReaderContext {
return nil
}
func (ctx *AtomicReaderContext) Reader() IndexReader {
return ctx.reader
}