Skip to content

Commit

Permalink
PyLucene 7 supported.
Browse files Browse the repository at this point in the history
  • Loading branch information
coady committed Dec 28, 2017
1 parent 5e94bbc commit 4f2ac31
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 36 deletions.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Changes
==================
dev

* PyLucene 6 required
* PyLucene >=6 required
* Python 3 support

1.9
Expand Down
14 changes: 6 additions & 8 deletions lupyne/engine/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from six import string_types
from six.moves import filter, map, range, zip
from .analyzers import Analyzer
from .queries import suppress, Query, DocValues, SpellParser
from .queries import lucene6, suppress, Query, DocValues, SpellParser
from .documents import Field, Document, Hits, GroupingSearch
from ..utils import long, Atomic, SpellChecker

Expand Down Expand Up @@ -219,7 +219,8 @@ def terms(self, name, value='', stop='', counts=False, distance=0, prefix=0):
return iter([])
term, termsenum = index.Term(name, value), terms.iterator()
if distance:
terms = termsenum = search.FuzzyTermsEnum(terms, util.AttributeSource(), term, float(distance), prefix, False)
distance = (float if lucene6 else int)(distance)
terms = termsenum = search.FuzzyTermsEnum(terms, util.AttributeSource(), term, distance, prefix, False)
else:
termsenum.seekCeil(util.BytesRef(value))
terms = itertools.chain([termsenum.term()], util.BytesRefIterator.cast_(termsenum))
Expand Down Expand Up @@ -341,7 +342,7 @@ def spans(self, query, positions=False):
:param positions: optionally include slice positions instead of counts
"""
offset = 0
weight = query.createWeight(self, False)
weight = query.createWeight(self, False, *([1.0] * (not lucene6)))
postings = search.spans.SpanWeight.Postings.POSITIONS
for reader in self.readers:
try:
Expand Down Expand Up @@ -456,11 +457,8 @@ def match(self, document, *queries):
"""Generate scores for all queries against a given document mapping."""
searcher = index.memory.MemoryIndex()
for name, value in document.items():
if isinstance(value, string_types):
value = value, self.analyzer
elif isinstance(value, analysis.TokenStream):
value = value,
searcher.addField(name, *value)
args = [self.analyzer] * isinstance(value, string_types)
searcher.addField(name, value, *args)
return (searcher.search(self.parse(query)) for query in queries)


Expand Down
67 changes: 49 additions & 18 deletions lupyne/engine/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from six.moves import map, range
from ..utils import method

lucene6 = lucene.VERSION.startswith('6.')


class Query(object):
"""Inherited lucene Query, with dynamic base class acquisition.
Expand Down Expand Up @@ -221,34 +223,63 @@ def suppress(exception):
raise


class DocValues:
"""DocValues with type conversion."""
class Numeric(object):
def __init__(self, docvalues, size, type):
self.docvalues, self.size, self.type = docvalues, size, type
class Base(object):
def __init__(self, docvalues, size, type):
self.docvalues, self.size, self.type = docvalues, size, type

def __iter__(self):
return map(self.__getitem__, range(self.size))

def __iter__(self):
return map(self.__getitem__, range(self.size))
def select(self, ids):
"""Return mapping of doc ids to values."""
return {id: self[id] for id in sorted(ids)}


class DocValues: # pragma: no cover
"""DocValues with type conversion."""
class Numeric(Base):
def __getitem__(self, id):
return self.type(self.docvalues.get(id))
if self.docvalues.advanceExact(id):
return self.type(self.docvalues.longValue())

def select(self, ids):
"""Return mapping of doc ids to values."""
return {id: self[id] for id in sorted(ids)}
class Binary(Numeric):
def __getitem__(self, id):
if self.docvalues.advanceExact(id):
return self.type(self.docvalues.binaryValue())

Binary = Sorted = Numeric
Sorted = Binary

class SortedNumeric(Numeric):
class SortedNumeric(Base):
def __getitem__(self, id):
self.docvalues.document = id
return tuple(self.type(self.docvalues.valueAt(index)) for index in range(self.docvalues.count()))
if self.docvalues.advanceExact(id):
return tuple(self.type(self.docvalues.nextValue()) for _ in range(self.docvalues.docValueCount()))

class SortedSet(Sorted):
class SortedSet(Base):
def __getitem__(self, id):
self.docvalues.document = id
ords = iter(self.docvalues.nextOrd, self.docvalues.NO_MORE_ORDS)
return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)
if self.docvalues.advanceExact(id):
return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)


if lucene6: # pragma: no cover
class DocValues: # noqa
"""DocValues with type conversion."""
class Numeric(Base):
def __getitem__(self, id):
return self.type(self.docvalues.get(id))

Binary = Sorted = Numeric

class SortedNumeric(Base):
def __getitem__(self, id):
self.docvalues.document = id
return tuple(self.type(self.docvalues.valueAt(index)) for index in range(self.docvalues.count()))

class SortedSet(Base):
def __getitem__(self, id):
self.docvalues.document = id
ords = iter(self.docvalues.nextOrd, self.docvalues.NO_MORE_ORDS)
return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)


class SpellParser(PythonQueryParser):
Expand Down
10 changes: 3 additions & 7 deletions tests/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,9 @@ def test_searcher(tempdir, fields, constitution):
assert reader.refCount == 0
assert list(indexer.spellcheckers) == ['amendment']
analyzer = engine.Analyzer.standard()
doc = {
'text': doc['text'],
'amendment': analyzer.tokens(doc['amendment']),
'date': (analyzer.tokens(doc['date']), 2.0),
}
scores = list(searcher.match(doc, 'text:congress', 'text:law', 'amendment:27', 'date:19*'))
assert 0.0 == scores[0] < scores[1] < scores[2] < scores[3] == 1.0
doc = {'text': doc['text'], 'amendment': analyzer.tokens(doc['amendment'])}
scores = list(searcher.match(doc, 'text:congress', 'text:law', 'amendment:27'))
assert 0.0 == scores[0] < scores[1] <= scores[2] < 1.0
assert len(indexer) == len(indexer.search()) == 35
articles = list(indexer.terms('article'))
articles.remove('Preamble')
Expand Down
3 changes: 1 addition & 2 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def test_search(resource):
result = resource.search(q='text:people', count=5, sort='-date,year:int')
assert result['docs'][0]['__keys__'] == ['1913-04-08', 1913] and result['docs'][-1]['__keys__'] == ['1791-12-15', 1791]
result = resource.search(q='text:people', start=2, count=2, facets='date')
assert [doc['amendment'] for doc in result['docs']] == ['10', '1']
assert result['count'] == 8 and result['facets']['date'] == {'1791-12-15': 5, '1913-04-08': 1}
result = resource.search(q='text:president', facets='date')
assert len(result['facets']['date']) == sum(result['facets']['date'].values()) == 7
Expand All @@ -163,7 +162,7 @@ def test_highlights(resource):
assert result['count'] == 25 and set(result['query'].split()) == {'text:united', 'text:states'}
result = resource.search(q='amendment:2', mlt=0, **{'mlt.fields': 'text', 'mlt.minTermFreq': 1, 'mlt.minWordLen': 6})
assert result['count'] == 11 and set(result['query'].split()) == {'text:necessary', 'text:people'}
assert [doc['amendment'] for doc in result['docs'][:4]] == ['2', '9', '10', '1']
assert [doc['amendment'] for doc in result['docs'][:3]] == ['2', '9', '10']
result = resource.search(q='text:people', count=1, timeout=-1)
assert result == {'query': 'text:people', 'count': None, 'maxscore': None, 'docs': []}
result = resource.search(q='text:people', timeout=0.01)
Expand Down

0 comments on commit 4f2ac31

Please sign in to comment.