PyLucene 7 supported.

coady · Dec 28, 2017 · 4f2ac31 · 4f2ac31
1 parent 5e94bbc
commit 4f2ac31
Show file tree

Hide file tree

Showing 5 changed files with 60 additions and 36 deletions.
diff --git a/README.rst b/README.rst
@@ -61,7 +61,7 @@ Changes
 ==================
 dev
 
-* PyLucene 6 required
+* PyLucene >=6 required
 * Python 3 support
 
 1.9

diff --git a/lupyne/engine/indexers.py b/lupyne/engine/indexers.py
@@ -17,7 +17,7 @@
 from six import string_types
 from six.moves import filter, map, range, zip
 from .analyzers import Analyzer
-from .queries import suppress, Query, DocValues, SpellParser
+from .queries import lucene6, suppress, Query, DocValues, SpellParser
 from .documents import Field, Document, Hits, GroupingSearch
 from ..utils import long, Atomic, SpellChecker
 
@@ -219,7 +219,8 @@ def terms(self, name, value='', stop='', counts=False, distance=0, prefix=0):
             return iter([])
         term, termsenum = index.Term(name, value), terms.iterator()
         if distance:
-            terms = termsenum = search.FuzzyTermsEnum(terms, util.AttributeSource(), term, float(distance), prefix, False)
+            distance = (float if lucene6 else int)(distance)
+            terms = termsenum = search.FuzzyTermsEnum(terms, util.AttributeSource(), term, distance, prefix, False)
         else:
             termsenum.seekCeil(util.BytesRef(value))
             terms = itertools.chain([termsenum.term()], util.BytesRefIterator.cast_(termsenum))
@@ -341,7 +342,7 @@ def spans(self, query, positions=False):
         :param positions: optionally include slice positions instead of counts
         """
         offset = 0
-        weight = query.createWeight(self, False)
+        weight = query.createWeight(self, False, *([1.0] * (not lucene6)))
         postings = search.spans.SpanWeight.Postings.POSITIONS
         for reader in self.readers:
             try:
@@ -456,11 +457,8 @@ def match(self, document, *queries):
         """Generate scores for all queries against a given document mapping."""
         searcher = index.memory.MemoryIndex()
         for name, value in document.items():
-            if isinstance(value, string_types):
-                value = value, self.analyzer
-            elif isinstance(value, analysis.TokenStream):
-                value = value,
-            searcher.addField(name, *value)
+            args = [self.analyzer] * isinstance(value, string_types)
+            searcher.addField(name, value, *args)
         return (searcher.search(self.parse(query)) for query in queries)
 
 

diff --git a/lupyne/engine/queries.py b/lupyne/engine/queries.py
@@ -13,6 +13,8 @@
 from six.moves import map, range
 from ..utils import method
 
+lucene6 = lucene.VERSION.startswith('6.')
+
 
 class Query(object):
     """Inherited lucene Query, with dynamic base class acquisition.
@@ -221,34 +223,63 @@ def suppress(exception):
             raise
 
 
-class DocValues:
-    """DocValues with type conversion."""
-    class Numeric(object):
-        def __init__(self, docvalues, size, type):
-            self.docvalues, self.size, self.type = docvalues, size, type
+class Base(object):
+    def __init__(self, docvalues, size, type):
+        self.docvalues, self.size, self.type = docvalues, size, type
+
+    def __iter__(self):
+        return map(self.__getitem__, range(self.size))
 
-        def __iter__(self):
-            return map(self.__getitem__, range(self.size))
+    def select(self, ids):
+        """Return mapping of doc ids to values."""
+        return {id: self[id] for id in sorted(ids)}
 
+
+class DocValues:  # pragma: no cover
+    """DocValues with type conversion."""
+    class Numeric(Base):
         def __getitem__(self, id):
-            return self.type(self.docvalues.get(id))
+            if self.docvalues.advanceExact(id):
+                return self.type(self.docvalues.longValue())
 
-        def select(self, ids):
-            """Return mapping of doc ids to values."""
-            return {id: self[id] for id in sorted(ids)}
+    class Binary(Numeric):
+        def __getitem__(self, id):
+            if self.docvalues.advanceExact(id):
+                return self.type(self.docvalues.binaryValue())
 
-    Binary = Sorted = Numeric
+    Sorted = Binary
 
-    class SortedNumeric(Numeric):
+    class SortedNumeric(Base):
         def __getitem__(self, id):
-            self.docvalues.document = id
-            return tuple(self.type(self.docvalues.valueAt(index)) for index in range(self.docvalues.count()))
+            if self.docvalues.advanceExact(id):
+                return tuple(self.type(self.docvalues.nextValue()) for _ in range(self.docvalues.docValueCount()))
 
-    class SortedSet(Sorted):
+    class SortedSet(Base):
         def __getitem__(self, id):
-            self.docvalues.document = id
             ords = iter(self.docvalues.nextOrd, self.docvalues.NO_MORE_ORDS)
-            return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)
+            if self.docvalues.advanceExact(id):
+                return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)
+
+
+if lucene6:  # pragma: no cover
+    class DocValues:  # noqa
+        """DocValues with type conversion."""
+        class Numeric(Base):
+            def __getitem__(self, id):
+                return self.type(self.docvalues.get(id))
+
+        Binary = Sorted = Numeric
+
+        class SortedNumeric(Base):
+            def __getitem__(self, id):
+                self.docvalues.document = id
+                return tuple(self.type(self.docvalues.valueAt(index)) for index in range(self.docvalues.count()))
+
+        class SortedSet(Base):
+            def __getitem__(self, id):
+                self.docvalues.document = id
+                ords = iter(self.docvalues.nextOrd, self.docvalues.NO_MORE_ORDS)
+                return tuple(self.type(self.docvalues.lookupOrd(ord)) for ord in ords)
 
 
 class SpellParser(PythonQueryParser):

diff --git a/tests/test_engine.py b/tests/test_engine.py
@@ -129,13 +129,9 @@ def test_searcher(tempdir, fields, constitution):
     assert reader.refCount == 0
     assert list(indexer.spellcheckers) == ['amendment']
     analyzer = engine.Analyzer.standard()
-    doc = {
-        'text': doc['text'],
-        'amendment': analyzer.tokens(doc['amendment']),
-        'date': (analyzer.tokens(doc['date']), 2.0),
-    }
-    scores = list(searcher.match(doc, 'text:congress', 'text:law', 'amendment:27', 'date:19*'))
-    assert 0.0 == scores[0] < scores[1] < scores[2] < scores[3] == 1.0
+    doc = {'text': doc['text'], 'amendment': analyzer.tokens(doc['amendment'])}
+    scores = list(searcher.match(doc, 'text:congress', 'text:law', 'amendment:27'))
+    assert 0.0 == scores[0] < scores[1] <= scores[2] < 1.0
     assert len(indexer) == len(indexer.search()) == 35
     articles = list(indexer.terms('article'))
     articles.remove('Preamble')

diff --git a/tests/test_server.py b/tests/test_server.py
@@ -140,7 +140,6 @@ def test_search(resource):
     result = resource.search(q='text:people', count=5, sort='-date,year:int')
     assert result['docs'][0]['__keys__'] == ['1913-04-08', 1913] and result['docs'][-1]['__keys__'] == ['1791-12-15', 1791]
     result = resource.search(q='text:people', start=2, count=2, facets='date')
-    assert [doc['amendment'] for doc in result['docs']] == ['10', '1']
     assert result['count'] == 8 and result['facets']['date'] == {'1791-12-15': 5, '1913-04-08': 1}
     result = resource.search(q='text:president', facets='date')
     assert len(result['facets']['date']) == sum(result['facets']['date'].values()) == 7
@@ -163,7 +162,7 @@ def test_highlights(resource):
     assert result['count'] == 25 and set(result['query'].split()) == {'text:united', 'text:states'}
     result = resource.search(q='amendment:2', mlt=0, **{'mlt.fields': 'text', 'mlt.minTermFreq': 1, 'mlt.minWordLen': 6})
     assert result['count'] == 11 and set(result['query'].split()) == {'text:necessary', 'text:people'}
-    assert [doc['amendment'] for doc in result['docs'][:4]] == ['2', '9', '10', '1']
+    assert [doc['amendment'] for doc in result['docs'][:3]] == ['2', '9', '10']
     result = resource.search(q='text:people', count=1, timeout=-1)
     assert result == {'query': 'text:people', 'count': None, 'maxscore': None, 'docs': []}
     result = resource.search(q='text:people', timeout=0.01)