Skip to content

Commit

Permalink
Take up the black.
Browse files Browse the repository at this point in the history
  • Loading branch information
coady committed Jun 23, 2019
1 parent fcf60c9 commit ad465e1
Show file tree
Hide file tree
Showing 19 changed files with 118 additions and 46 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dist: html

check:
python3 setup.py $@ -ms
black --check -q .
flake8
python3 -m examples
pytest-2.7 tests/test_engine.py --cov=lupyne.engine --cov-fail-under=100
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
[![image](https://api.shippable.com/projects/56059e3e1895ca4474182ec3/badge?branch=master)](https://app.shippable.com/github/coady/lupyne)
[![image](https://api.shippable.com/projects/56059e3e1895ca4474182ec3/coverageBadge?branch=master)](https://app.shippable.com/github/coady/lupyne)
[![image](https://requires.io/github/coady/lupyne/requirements.svg)](https://requires.io/github/coady/lupyne/requirements/)
[![image](https://img.shields.io/badge/code%20style-black-000000.svg)](https://pypi.org/project/black/)

Lupyne is a search engine based on [PyLucene](http://lucene.apache.org/pylucene/), the Python extension for accessing Java Lucene.
Lucene is a relatively low-level toolkit, and PyLucene wraps it through automatic code generation.
Expand Down
6 changes: 2 additions & 4 deletions examples/grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import itertools
import lucene
from lupyne import engine

assert lucene.getVMEnv() or lucene.initVM()

colors = 'red', 'green', 'blue', 'cyan', 'magenta', 'yellow'
Expand Down Expand Up @@ -43,8 +44,5 @@
assert indexer.facets(query, 'color')['color'] == facets

# queries allow flexible customizations without any indexing changes
queries = {
'additive': engine.Query.any(color=colors[:3]),
'subtractive': engine.Query.any(color=colors[3:]),
}
queries = {'additive': engine.Query.any(color=colors[:3]), 'subtractive': engine.Query.any(color=colors[3:])}
assert indexer.facets(query, color=queries)['color'] == {'additive': 6, 'subtractive': 15}
13 changes: 7 additions & 6 deletions examples/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import lucene
from org.apache.lucene import analysis, document, index, queryparser, search, store
from lupyne import engine

assert lucene.getVMEnv() or lucene.initVM()

# # # lucene # # #
Expand Down Expand Up @@ -41,14 +42,14 @@
# # # lupyne # # #

# Store the index in memory:
indexer = engine.Indexer() # Indexer combines Writer and Searcher; RAMDirectory and StandardAnalyzer are defaults
indexer.set('fieldname', engine.Field.Text, stored=True) # default indexed text settings for documents
indexer.add(fieldname=text) # add document
indexer.commit() # commit changes and refresh searcher
indexer = engine.Indexer() # Indexer combines Writer and Searcher; RAMDirectory and StandardAnalyzer are defaults
indexer.set('fieldname', engine.Field.Text, stored=True) # default indexed text settings for documents
indexer.add(fieldname=text) # add document
indexer.commit() # commit changes and refresh searcher

# Now search the index:
hits = indexer.search('text', field='fieldname') # parsing handled if necessary
hits = indexer.search('text', field='fieldname') # parsing handled if necessary
assert len(hits) == 1
for hit in hits: # hits support mapping interface
for hit in hits: # hits support mapping interface
assert hit['fieldname'] == text
# closing is handled automatically
1 change: 1 addition & 0 deletions examples/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from org.apache.lucene import index, search
from org.apache.lucene.search import spans
from lupyne.engine import Query

assert lucene.getVMEnv() or lucene.initVM()

# # # lucene # # #
Expand Down
1 change: 1 addition & 0 deletions examples/searching.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from datetime import date
import lucene
from lupyne import engine

assert lucene.getVMEnv() or lucene.initVM()
Q = engine.Query

Expand Down
1 change: 1 addition & 0 deletions examples/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import lucene
from lupyne import engine, server
from tests import conftest

Q = engine.Query


Expand Down
1 change: 1 addition & 0 deletions examples/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import lucene
from org.apache.lucene import search
from lupyne import engine

assert lucene.getVMEnv() or lucene.initVM()

colors = 'red', 'green', 'blue', 'cyan', 'magenta', 'yellow'
Expand Down
5 changes: 5 additions & 0 deletions lupyne/engine/analyzers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from org.apache.pylucene.queryparser.classic import PythonQueryParser
from six import string_types
from .utils import method

try:
from typing import Mapping
except ImportError: # pragma: no cover
Expand All @@ -15,6 +16,7 @@

class TokenStream(analysis.TokenStream):
"""TokenStream mixin with support for iteration and attributes cached as properties."""

def __iter__(self):
self.reset()
return self
Expand All @@ -23,6 +25,7 @@ def __next__(self):
if self.incrementToken():
return self
raise StopIteration

next = __next__

def __getattr__(self, name):
Expand Down Expand Up @@ -84,6 +87,7 @@ class TokenFilter(PythonTokenFilter, TokenStream):
Subclass and override :meth:`incrementToken`.
"""

def __init__(self, input):
PythonTokenFilter.__init__(self, input)
self.input = input
Expand All @@ -99,6 +103,7 @@ class Analyzer(PythonAnalyzer):
:param tokenizer: lucene Tokenizer class or callable, called with no args
:param filters: lucene TokenFilter classes or callables, successively called on input tokens
"""

def __init__(self, tokenizer, *filters):
PythonAnalyzer.__init__(self)
self.tokenizer, self.filters = tokenizer, filters
Expand Down
24 changes: 18 additions & 6 deletions lupyne/engine/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from six.moves import map, range
from .queries import Query
from .utils import convert, long

FieldType = document.FieldType


Expand All @@ -20,6 +21,7 @@ class Field(FieldType):
:param boost: boost factor
:param stored, indexed, settings: lucene FieldType attributes
"""

docValuesType = property(FieldType.docValuesType, FieldType.setDocValuesType)
indexOptions = property(FieldType.indexOptions, FieldType.setIndexOptions)
omitNorms = property(FieldType.omitNorms, FieldType.setOmitNorms)
Expand All @@ -33,8 +35,10 @@ class Field(FieldType):
properties = {name for name in locals() if not name.startswith('__')}
types = {int: 'long', float: 'double'}
types.update(NUMERIC='long', BINARY='string', SORTED='string', SORTED_NUMERIC='long', SORTED_SET='string')
dimensions = property(getattr(FieldType, 'pointDataDimensionCount', getattr(FieldType, 'pointDimensionCount', None)),
lambda self, count: self.setDimensions(count, Long.BYTES))
dimensions = property(
getattr(FieldType, 'pointDataDimensionCount', getattr(FieldType, 'pointDimensionCount', None)),
lambda self, count: self.setDimensions(count, Long.BYTES),
)

def __init__(self, name, docValuesType='', indexOptions='', dimensions=0, **settings):
super(Field, self).__init__()
Expand All @@ -50,7 +54,7 @@ def __init__(self, name, docValuesType='', indexOptions='', dimensions=0, **sett
if docValuesType:
self.docValuesType = getattr(index.DocValuesType, docValuesType.upper())
self.docValueClass = getattr(document, docValuesType.title().replace('_', '') + 'DocValuesField')
if (self.stored or self.indexed or self.dimensions):
if self.stored or self.indexed or self.dimensions:
settings = self.settings
del settings['docValuesType']
self.docValueLess = Field(self.name, **settings)
Expand Down Expand Up @@ -113,6 +117,7 @@ class NestedField(Field):
:param sep: field separator used on name and values
"""

def __init__(self, name, sep='.', **settings):
Field.__init__(self, name, **Field.String(name, **settings).settings)
self.sep = sep
Expand Down Expand Up @@ -149,6 +154,7 @@ class DateTimeField(Field):
Supports datetimes, dates, and any prefix of time tuples.
"""

def __init__(self, name, dimensions=1, **settings):
Field.__init__(self, name, dimensions=dimensions, **settings)

Expand All @@ -157,7 +163,7 @@ def timestamp(cls, date):
"""Return utc timestamp from date or time tuple."""
if isinstance(date, datetime.date):
return calendar.timegm(date.timetuple()) + getattr(date, 'microsecond', 0) * 1e-6
return float(calendar.timegm(tuple(date) + (None, 1, 1, 0, 0, 0)[len(date):]))
return float(calendar.timegm(tuple(date) + (None, 1, 1, 0, 0, 0)[len(date) :]))

def items(self, *dates):
"""Generate lucene NumericFields of timestamps."""
Expand All @@ -171,7 +177,7 @@ def range(self, start, stop, **inclusive):
def prefix(self, date):
"""Return range query which matches the date prefix."""
if isinstance(date, datetime.date):
date = date.timetuple()[:6 if isinstance(date, datetime.datetime) else 3]
date = date.timetuple()[: 6 if isinstance(date, datetime.datetime) else 3]
if len(date) == 2 and date[1] == 12: # month must be valid
return self.range(date, (date[0] + 1, 1))
return self.range(date, tuple(date[:-1]) + (date[-1] + 1,))
Expand All @@ -183,7 +189,7 @@ def duration(self, date, days=0, **delta):
:param days,delta: timedelta parameters
"""
if not isinstance(date, datetime.date):
date = datetime.datetime(*(tuple(date) + (None, 1, 1)[len(date):]))
date = datetime.datetime(*(tuple(date) + (None, 1, 1)[len(date) :]))
delta = datetime.timedelta(days, **delta)
return self.range(*sorted([date, date + delta]), upper=True)

Expand All @@ -204,6 +210,7 @@ def within(self, days=0, weeks=0, utc=True, **delta):

class SpatialField(Field):
"""Geospatial points, indexed with optional docvalues."""

def __init__(self, name, dimensions=1, **settings):
Field.__init__(self, name, dimensions=dimensions, **settings)

Expand All @@ -230,6 +237,7 @@ def distances(self, lng, lat):

class Document(dict):
"""Multimapping of field names to values, but default getters return the first value."""

def __init__(self, doc):
for field in doc.iterator():
value = convert(field.numericValue() or field.stringValue() or field.binaryValue())
Expand Down Expand Up @@ -258,6 +266,7 @@ def dict(self, *names, **defaults):

class Hit(Document):
"""A Document from a search result, with :attr:`id`, :attr:`score`, and optional sort :attr:`keys`."""

def __init__(self, doc, id, score, keys=()):
Document.__init__(self, doc)
self.id, self.score = id, score
Expand All @@ -283,6 +292,7 @@ class Hits(object):
:param count: total number of hits; float indicates estimate
:param fields: optional field selectors
"""

def __init__(self, searcher, scoredocs, count=0, fields=None):
self.searcher, self.scoredocs = searcher, scoredocs
if hasattr(count, 'relation'): # pragma: no cover
Expand Down Expand Up @@ -370,6 +380,7 @@ def sorted(self, key, reverse=False):

class Groups(object):
"""Sequence of grouped `Hits`_."""

select = Hits.__dict__['select']

def __init__(self, searcher, groupdocs, count=0, fields=None):
Expand Down Expand Up @@ -401,6 +412,7 @@ class GroupingSearch(grouping.GroupingSearch):
:param cache: use unlimited caching
:param attrs: additional attributes to set
"""

def __init__(self, field, sort=None, cache=True, **attrs):
grouping.GroupingSearch.__init__(self, field)
self.field = field
Expand Down
10 changes: 8 additions & 2 deletions lupyne/engine/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

class closing(set):
"""Manage lifespan of registered objects, similar to contextlib.closing."""

def __del__(self):
for obj in self:
obj.close()
Expand Down Expand Up @@ -82,6 +83,7 @@ class IndexReader(object):
:param reader: lucene IndexReader
"""

def __init__(self, reader):
self.indexReader = reader

Expand Down Expand Up @@ -236,7 +238,7 @@ def positions(self, name, value, payloads=False, offsets=False):
"""Generate doc ids and positions which contain given term, optionally with offsets, or only ones with payloads."""
func = index.MultiFields.getTermPositionsEnum if LU7 else index.MultiTerms.getTermPostingsEnum
docsenum = func(self.indexReader, name, util.BytesRef(value))
for doc in (iter(docsenum.nextDoc, index.PostingsEnum.NO_MORE_DOCS) if docsenum else ()):
for doc in iter(docsenum.nextDoc, index.PostingsEnum.NO_MORE_DOCS) if docsenum else ():
positions = (docsenum.nextPosition() for _ in range(docsenum.freq()))
if payloads:
positions = ((position, docsenum.payload.utf8ToString()) for position in positions if docsenum.payload)
Expand Down Expand Up @@ -285,6 +287,7 @@ class IndexSearcher(search.IndexSearcher, IndexReader):
:param directory: directory path, lucene Directory, or lucene IndexReader
:param analyzer: lucene Analyzer, default StandardAnalyzer
"""

def __init__(self, directory, analyzer=None):
self.shared = closing()
search.IndexSearcher.__init__(self, self.shared.reader(directory))
Expand Down Expand Up @@ -472,6 +475,7 @@ class MultiSearcher(IndexSearcher):
:param reader: directory paths, Directories, IndexReaders, or a single MultiReader
:param analyzer: lucene Analyzer, default StandardAnalyzer
"""

def __init__(self, reader, analyzer=None):
IndexSearcher.__init__(self, reader, analyzer)
self.indexReaders = [index.DirectoryReader.cast_(context.reader()) for context in self.context.children()]
Expand Down Expand Up @@ -501,6 +505,7 @@ class IndexWriter(index.IndexWriter):
:param version: lucene Version argument passed to IndexWriterConfig, default is latest
:param attrs: additional attributes to set on IndexWriterConfig
"""

parse = IndexSearcher.__dict__['parse']

def __init__(self, directory=None, mode='a', analyzer=None, version=None, **attrs):
Expand Down Expand Up @@ -546,7 +551,7 @@ def document(self, items=(), **terms):
doc = document.Document()
for name, values in dict(items, **terms).items():
if isinstance(values, Atomic):
values = values,
values = (values,)
for field in self.fields[name].items(*values):
doc.add(field)
return doc
Expand Down Expand Up @@ -614,6 +619,7 @@ class Indexer(IndexWriter):
:param nrt: optionally use a near real-time searcher
"""

def __init__(self, directory=None, mode='a', analyzer=None, version=None, nrt=False, **attrs):
IndexWriter.__init__(self, directory, mode, analyzer, version, **attrs)
IndexWriter.commit(self)
Expand Down
9 changes: 6 additions & 3 deletions lupyne/engine/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class Query(object):
Uses class methods and operator overloading for convenient query construction.
"""

def __new__(cls, base, *args):
return base.__new__(type(base.__name__, (cls, base), {}))

Expand All @@ -36,7 +37,7 @@ def boolean(cls, occur, *queries, **terms):
for query in queries:
builder.add(query, occur)
for name, values in terms.items():
for value in ([values] if isinstance(values, string_types) else values):
for value in [values] if isinstance(values, string_types) else values:
builder.add(cls.term(name, value), occur)
return builder.build()

Expand All @@ -58,8 +59,7 @@ def filter(cls, *queries, **terms):
@classmethod
def disjunct(cls, multiplier, *queries, **terms):
"""Return lucene DisjunctionMaxQuery from queries and terms."""
terms = tuple(cls.term(name, value) for name, values in terms.items()
for value in ([values] if isinstance(values, string_types) else values))
terms = tuple(cls.term(name, value) for name, values in terms.items() for value in ([values] if isinstance(values, string_types) else values))
return cls(search.DisjunctionMaxQuery, Arrays.asList(queries + terms), multiplier)

@classmethod
Expand Down Expand Up @@ -207,6 +207,7 @@ def __rsub__(self, other):

class SpanQuery(Query):
"""Inherited lucene SpanQuery with additional span constructors."""

def __getitem__(self, slc):
start, stop, step = slc.indices(Integer.MAX_VALUE)
assert step == 1, 'slice step is not supported'
Expand Down Expand Up @@ -246,6 +247,7 @@ def within(self, other):

class DocValues:
"""DocValues with type conversion."""

class Sorted(object):
def __init__(self, docvalues, size, type):
self.docvalues, self.size, self.type = docvalues, size, type
Expand Down Expand Up @@ -285,6 +287,7 @@ class SpellParser(PythonQueryParser):
Assign a searcher attribute or override :meth:`correct` implementation.
"""

def suggest(self, term):
"""Return term with text replaced as necessary."""
field = term.field()
Expand Down

0 comments on commit ad465e1

Please sign in to comment.