Skip to content

Commit

Permalink
Lucene dropped support for maxscore.
Browse files Browse the repository at this point in the history
  • Loading branch information
coady committed Jun 16, 2019
1 parent 6e877bd commit 9d6ef1b
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 44 deletions.
29 changes: 16 additions & 13 deletions lupyne/engine/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,17 +276,16 @@ class Hits(object):
"""Search results: lazily evaluated and memory efficient.
Provides a read-only sequence interface to hit objects.
.. versionchanged:: 2.3 maxscore option removed; computed property instead
:param searcher: `IndexSearcher`_ which can retrieve documents
:param scoredocs: lucene ScoreDocs
:param count: total number of hits
:param maxscore: maximum score
:param fields: optional field selectors
"""
def __init__(self, searcher, scoredocs, count=None, maxscore=None, fields=None):
def __init__(self, searcher, scoredocs, count=None, fields=None):
self.searcher, self.scoredocs = searcher, scoredocs
self.count, self.maxscore = count, maxscore
self.fields = fields
self.count, self.fields = count, fields

def select(self, *fields):
"""Only load selected fields."""
Expand All @@ -298,7 +297,7 @@ def __len__(self):
def __getitem__(self, index):
if isinstance(index, slice):
scoredocs = list(map(self.scoredocs.__getitem__, range(*index.indices(len(self)))))
return type(self)(self.searcher, scoredocs, self.count, self.maxscore, self.fields)
return type(self)(self.searcher, scoredocs, self.count, self.fields)
scoredoc = self.scoredocs[index]
keys = search.FieldDoc.cast_(scoredoc).fields if search.FieldDoc.instance_(scoredoc) else ()
doc = self.searcher.doc(scoredoc.doc, *([self.fields] * bool(self.fields)))
Expand All @@ -312,6 +311,11 @@ def ids(self):
def scores(self):
return map(operator.attrgetter('score'), self.scoredocs)

@property
def maxscore(self):
"""max score of present hits; not necessarily of all matches"""
return max(self.scores) if self else float('nan')

def items(self):
"""Generate zipped ids and scores."""
return map(operator.attrgetter('doc', 'score'), self.scoredocs)
Expand Down Expand Up @@ -346,9 +350,9 @@ def groupby(self, func, count=None, docs=None):
group.scoredocs.append(scoredoc)
groups = list(groups.values())
for group in groups:
group.count, group.maxscore = len(group), max(group.scores)
group.count = len(group)
group.scoredocs = group.scoredocs[:docs]
return Groups(self.searcher, groups[:count], len(groups), self.maxscore, self.fields)
return Groups(self.searcher, groups[:count], len(groups), self.fields)

def filter(self, func):
"""Return `Hits`_ filtered by function applied to doc ids."""
Expand All @@ -358,25 +362,24 @@ def filter(self, func):
def sorted(self, key, reverse=False):
"""Return `Hits`_ sorted by key function applied to doc ids."""
scoredocs = sorted(self.scoredocs, key=lambda scoredoc: key(scoredoc.doc), reverse=reverse)
return type(self)(self.searcher, scoredocs, self.count, self.maxscore, self.fields)
return type(self)(self.searcher, scoredocs, self.count, self.fields)


class Groups(object):
"""Sequence of grouped `Hits`_."""
select = Hits.__dict__['select']

def __init__(self, searcher, groupdocs, count=None, maxscore=None, fields=None):
def __init__(self, searcher, groupdocs, count=None, fields=None):
self.searcher, self.groupdocs = searcher, groupdocs
self.count, self.maxscore = count, maxscore
self.fields = fields
self.count, self.fields = count, fields

def __len__(self):
return len(self.groupdocs)

def __getitem__(self, index):
hits = groupdocs = self.groupdocs[index]
if isinstance(groupdocs, grouping.GroupDocs):
hits = Hits(self.searcher, groupdocs.scoreDocs, groupdocs.totalHits, groupdocs.maxScore)
hits = Hits(self.searcher, groupdocs.scoreDocs, groupdocs.totalHits)
hits.value = convert(groupdocs.groupValue)
hits.fields = self.fields
return hits
Expand Down Expand Up @@ -417,4 +420,4 @@ def search(self, searcher, query, count=None, start=0):
if count is None:
count = sum(index.DocValues.getSorted(reader, self.field).valueCount for reader in searcher.readers) or 1
topgroups = grouping.GroupingSearch.search(self, searcher, query, start, count - start)
return Groups(searcher, topgroups.groups, topgroups.totalHitCount, topgroups.maxScore)
return Groups(searcher, topgroups.groups, topgroups.totalHitCount)
17 changes: 8 additions & 9 deletions lupyne/engine/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def count(self, *query, **options):
query = self.parse(*query, **options) if query else Query.alldocs()
return super(IndexSearcher, self).count(query)

def collector(self, count=None, sort=None, reverse=False, scores=False, maxscore=False):
def collector(self, count=None, sort=None, reverse=False, scores=False):
if count is None:
return search.CachingCollector.create(True, float('inf'))
count = min(count, self.maxDoc() or 1)
Expand All @@ -381,24 +381,24 @@ def collector(self, count=None, sort=None, reverse=False, scores=False, maxscore
sort = self.sortfield(sort, reverse=reverse)
if not isinstance(sort, search.Sort):
sort = search.Sort(sort)
return search.TopFieldCollector.create(sort, count, True, scores, maxscore)
return search.TopFieldCollector.create(sort, count, True, scores, False)

def search(self, query=None, count=None, sort=None, reverse=False, scores=False, maxscore=False, timeout=None, **parser):
def search(self, query=None, count=None, sort=None, reverse=False, scores=False, timeout=None, **parser):
"""Run query and return `Hits`_.
.. versionchanged:: 1.4 sort param for lucene only; use Hits.sorted with a callable
.. versionchanged:: 1.4 sort param for lucene only; use Hits.sorted with a callable
.. versionchanged:: 2.3 maxscore option removed; use Hits.maxscore property
:param query: query string or lucene Query
:param count: maximum number of hits to retrieve
:param sort: lucene Sort parameters
:param reverse: reverse flag used with sort
:param scores: compute scores for candidate results when sorting
:param maxscore: compute maximum score of all results when sorting
:param timeout: stop search after elapsed number of seconds
:param parser: :meth:`Analyzer.parse` options
"""
query = Query.alldocs() if query is None else self.parse(query, **parser)
cache = collector = self.collector(count, sort, reverse, scores, maxscore)
cache = collector = self.collector(count, sort, reverse, scores)
counter = search.TimeLimitingCollector.getGlobalCounter()
results = collector if timeout is None else search.TimeLimitingCollector(collector, counter, long(timeout * 1000))
with suppress(search.TimeLimitingCollector.TimeExceededException):
Expand All @@ -407,11 +407,10 @@ def search(self, query=None, count=None, sort=None, reverse=False, scores=False,
if isinstance(cache, search.CachingCollector):
collector = search.TotalHitCountCollector()
cache.replay(collector)
collector = self.collector(collector.totalHits or 1, sort, reverse, scores, maxscore)
collector = self.collector(collector.totalHits or 1, sort, reverse, scores)
cache.replay(collector)
topdocs = collector.topDocs()
stats = (topdocs.totalHits, topdocs.maxScore) * (timeout is None)
return Hits(self, topdocs.scoreDocs, *stats)
return Hits(self, topdocs.scoreDocs, topdocs.totalHits if timeout is None else None)

def facets(self, query, *fields, **query_map):
"""Return mapping of document counts for the intersection with each facet.
Expand Down
21 changes: 10 additions & 11 deletions lupyne/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,8 @@ def search(self, q=None, count: int = None, start: int = 0, fields: multi = None
facets: multi = '', group='', hl: multi = '', mlt: int = None, timeout: float = None, **options):
"""Run query and return documents.
.. versionchanged:: 2.3 maxscore option and result removed
**GET** /search?
Return array of document objects and total doc count.
Expand All @@ -378,9 +380,9 @@ def search(self, q=None, count: int = None, start: int = 0, fields: multi = None
&fields=\ *chars*,... &fields.multi=\ *chars*,... &fields.docvalues=\ *chars*\ [:*chars*],...
only include selected stored fields; multi-valued fields returned in an array; docvalues fields
&sort=\ [-]\ *chars*\ [:*chars*],... &sort.scores[=max]
&sort=\ [-]\ *chars*\ [:*chars*],... &sort.scores
| field name, optional type, minus sign indicates descending
| optionally score docs, additionally compute maximum score
| optionally score docs
&facets=\ *chars*,... &facets.count=\ *int*\&facets.min=0
| include facet counts for given field names
Expand All @@ -407,7 +409,6 @@ def search(self, q=None, count: int = None, start: int = 0, fields: multi = None
| {
| "query": *string*\|null,
| "count": *int*\|null,
| "maxscore": *number*\|null,
| "docs": [{"__id__": *int*, "__score__": *number*, "__keys__": *array*,
"__highlights__": {*string*: *array*,... }, *string*: *value*,... },... ],
| "facets": {*string*: {*string*: *int*,... },... },
Expand All @@ -431,22 +432,20 @@ def search(self, q=None, count: int = None, start: int = 0, fields: multi = None
count += start
if count == 0:
start = count = 1
scores = options.get('sort.scores')
scores = 'sort.scores' in options
gcount = options.get('group.count', 1)
scores = {'scores': scores is not None, 'maxscore': scores == 'max'}
if ':' in group:
hits = searcher.search(q, sort=sort, timeout=timeout, **scores)
hits = searcher.search(q, sort=sort, timeout=timeout, scores=scores)
name, docvalues = parse.docvalues(searcher, group)
with HTTPError(TypeError):
groups = hits.groupby(docvalues.select(hits.ids).__getitem__, count=count, docs=gcount)
groups.groupdocs = groups.groupdocs[start:]
elif group:
scores = {'includeScores': scores['scores'], 'includeMaxScore': scores['maxscore']}
groups = searcher.groupby(group, q, count, start=start, sort=sort, groupDocsLimit=gcount, **scores)
groups = searcher.groupby(group, q, count, start=start, sort=sort, groupDocsLimit=gcount, includeScores=scores)
else:
hits = searcher.search(q, sort=sort, count=count, timeout=timeout, **scores)
groups = engine.documents.Groups(searcher, [hits[start:]], hits.count, hits.maxscore)
result = {'query': q and str(q), 'count': groups.count, 'maxscore': groups.maxscore}
hits = searcher.search(q, sort=sort, count=count, timeout=timeout, scores=scores)
groups = engine.documents.Groups(searcher, [hits[start:]], hits.count)
result = {'query': q and str(q), 'count': groups.count}
fields, multi, docvalues = parse.fields(searcher, fields, **options)
if fields is None:
fields = {}
Expand Down
6 changes: 3 additions & 3 deletions tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,13 @@ def test_searcher(tempdir, fields, constitution):
assert 'Preamble' in (hit.get('article') for hit in hits)
assert len(hits) == hits.count == 8
assert set(map(type, hits.ids)) == {int} and set(map(type, hits.scores)) == {float}
assert hits.maxscore == max(hits.scores)
assert hits.maxscore == next(hits.scores)
ids = list(hits.ids)
hits = indexer.search('people', count=5, field='text')
assert list(hits.ids) == ids[:len(hits)]
assert len(hits) == 5 and hits.count == 8
assert not any(map(math.isnan, hits.scores))
assert hits.maxscore == max(hits.scores)
assert hits.maxscore == next(hits.scores)
hits = indexer.search('text:people', count=5, sort=search.Sort.INDEXORDER)
assert sorted(hits.ids) == list(hits.ids)
hit, = indexer.search('freedom', field='text')
Expand Down Expand Up @@ -378,7 +378,7 @@ def test_grouping(tempdir, indexer, zipcodes):
assert len(grouping) == len(list(grouping)) > 100
assert set(grouping) > set(facets)
hits = indexer.search(query, timeout=-1)
assert not hits and (hits.count is hits.maxscore is None)
assert not hits and hits.count is None and math.isnan(hits.maxscore)
hits = indexer.search(query, timeout=10)
assert len(hits) == hits.count == indexer.count(query) and hits.maxscore == 1.0
directory = store.RAMDirectory()
Expand Down
13 changes: 5 additions & 8 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def test_search(resource):
result = resource.search(q='Preamble', **{'q.field': 'article', 'q.type': 'prefix'})
assert result['count'] == 1 and result['query'] == 'article:Preamble*'
result = resource.search(q='text:"We the People"', **{'q.phraseSlop': 3})
assert 0 < result['maxscore'] and result['count'] == 1
assert result['count'] == 1
assert result['query'] == 'text:"we ? people"~3'
doc, = result['docs']
assert sorted(doc) == ['__id__', '__score__', 'article']
Expand All @@ -126,17 +126,14 @@ def test_search(resource):
assert sorted(docs, key=operator.itemgetter('__score__'), reverse=True) == docs
assert len(docs) == result['count'] == 8
result = resource.search(q='text:people', count=5)
maxscore = result['maxscore']
assert docs[:5] == result['docs'] and result['count'] == len(docs)
result = resource.search(q='text:people', count=5, sort='-year:int')
assert math.isnan(result['maxscore']) and all(math.isnan(doc['__score__']) for doc in result['docs'])
assert all(math.isnan(doc['__score__']) for doc in result['docs'])
assert result['docs'][0]['__keys__'] == [1913] and result['docs'][-1]['__keys__'] == [1791]
result = resource.search(q='text:people', sort='-year:int')
assert result['docs'][0]['__keys__'] == [1913] and result['docs'][-1]['__keys__'] == [0]
result = resource.search(q='text:people', count=5, sort='-year:int', **{'sort.scores': ''})
assert math.isnan(result['maxscore']) and maxscore in (doc['__score__'] for doc in result['docs'])
result = resource.search(q='text:people', count=1, sort='-year:int', **{'sort.scores': 'max'})
assert maxscore == result['maxscore'] and maxscore not in (doc['__score__'] for doc in result['docs'])
result = resource.search(q='text:people', count=5, sort='-date,year:int')
assert result['docs'][0]['__keys__'] == ['1913-04-08', 1913] and result['docs'][-1]['__keys__'] == ['1791-12-15', 1791]
result = resource.search(q='text:people', start=2, count=2, facets='date')
Expand Down Expand Up @@ -164,11 +161,11 @@ def test_highlights(resource):
assert result['count'] == 11 and set(result['query'].split()) == {'text:necessary', 'text:people'}
assert [doc['amendment'] for doc in result['docs'][:3]] == ['2', '9', '10']
result = resource.search(q='text:people', count=1, timeout=-1)
assert result == {'query': 'text:people', 'count': None, 'maxscore': None, 'docs': []}
assert result == {'query': 'text:people', 'count': None, 'docs': []}
result = resource.search(q='text:people', timeout=0.01)
assert result['count'] in (None, 8) and (result['maxscore'] is None or result['maxscore'] > 0)
assert result['count'] in (None, 8)
result = resource.search(q='+text:right +text:people')
assert result['count'] == 4 and 0 < result['maxscore']
assert result['count'] == 4
assert resource.search(q='hello', **{'q.field': 'body.title^2.0'})['query'] == '(body.title:hello)^2.0'


Expand Down

0 comments on commit 9d6ef1b

Please sign in to comment.