Skip to content

Commit

Permalink
Spatial module replaced with LatLon fields.
Browse files Browse the repository at this point in the history
  • Loading branch information
coady committed Dec 28, 2017
1 parent 28beec9 commit 591a7cc
Show file tree
Hide file tree
Showing 10 changed files with 55 additions and 287 deletions.
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ clean:
rm -rf dist lupyne.egg-info

html:
make -C docs $@ SPHINXOPTS=-W SPHINXBUILD=sphinx-build
make -C docs $@ SPHINXOPTS=-W
rst2$@.py README.rst docs/_build/README.$@
python3 -m examples.spatial > docs/_build/spatial.kml

dist: html
python3 setup.py sdist bdist_wheel
Expand Down
32 changes: 7 additions & 25 deletions docs/engine.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ engine

* `TokenFilter`_, `Analyzer`_
* `IndexSearcher`_, `MultiSearcher`_, `IndexWriter`_, `Indexer`_
* `Document`_, `Field`_, `NestedField`_, `NumericField`_, `DateTimeField`_
* `Document`_, `Field`_, `NestedField`_, `NumericField`_, `DateTimeField`_, `SpatialField`_
* `Query`_
* `PointField`_


analyzers
Expand Down Expand Up @@ -166,6 +165,12 @@ DateTimeField
:show-inheritance:
:members:

SpatialField
^^^^^^^^^^^^^
.. autoclass:: SpatialField
:show-inheritance:
:members:


queries
---------
Expand Down Expand Up @@ -204,26 +209,3 @@ SpellParser
.. attribute:: searcher

`IndexSearcher`_


spatial
---------
.. automodule:: lupyne.engine.spatial

Point
^^^^^^^^^^^^^
.. autoclass:: Point
:show-inheritance:
:members:

Tile
^^^^^^^^^^^^^
.. autoclass:: Tile
:show-inheritance:
:members:

PointField
^^^^^^^^^^^^^
.. autoclass:: PointField
:show-inheritance:
:members:
4 changes: 0 additions & 4 deletions docs/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,3 @@ grouping
server
---------
.. literalinclude:: ../examples/server.py

spatial
---------
.. literalinclude:: ../examples/spatial.py
14 changes: 6 additions & 8 deletions examples/searching.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,20 @@
The general solution is to index the term values into a prefix tree.
Then each query can expand to only values of the appropriate granularity.
Lucene's NumericFields encode numbers to be sortable, so it is also able to cluster prefixes into the same field.
Lucene's Point fields encode numbers to be sortable, so it is also able to cluster prefixes into the same field.
Whereas Lupyne's NestedField assumes the value is already a sortable string, so different fields must be used to cluster the prefixes.
There are trade-offs to each approach:
* NumericFields support range queries natively, but must translate prefix queries.
* Point fields support range queries natively, but must translate prefix queries.
* NestedFields support prefix queries optimally, but must translate range queries.
* NumericFields only support numbers, and result in unreadable values in the index.
* Point fields only support numbers, and result in unreadable values in the index.
* NestedFields support any searchable values, but pollute the field namespace.
Lupyne PointFields and DateTimeFields are implemented as NumericFields since both are easily encoded as numbers.
Lupyne SpatialFields and DateTimeFields are implemented as lucene Point fields.
NestedFields could still be used however, as demonstrated on dates below.
"""

from datetime import date
import lucene
from org.apache.lucene import search
from lupyne import engine
assert lucene.getVMEnv() or lucene.initVM()

Expand All @@ -40,7 +39,7 @@
indexer.set('incorporated', engine.DateTimeField)
indexer.set('year-month-day', engine.NestedField, sep='-')
indexer.set('population', engine.NumericField)
indexer.set('point', engine.PointField, precision=10)
indexer.set('point', engine.SpatialField)
# assigned fields can have a different key from their underlying field name
indexer.fields['location'] = engine.NestedField('state.city')

Expand Down Expand Up @@ -69,9 +68,8 @@
assert [hit['city'] for hit in indexer.search(query)] == ['San Francisco', 'Portland']

cities = ['San Francisco', 'Los Angeles', 'Portland']
for index, distance in enumerate([1e3, 1e5, 2e5, 1e6]):
for index, distance in enumerate([1e3, 1e5, 7e5, 1e6]):
query = indexer.fields['point'].within(-122.4, 37.7, distance=distance)
assert isinstance(query, search.BooleanQuery) and len(list(query)) <= 4
assert {hit['city'] for hit in indexer.search(query)} == set(cities[:index])

query = indexer.fields['location'].prefix('CA.San')
Expand Down
76 changes: 0 additions & 76 deletions examples/spatial.py

This file was deleted.

3 changes: 1 addition & 2 deletions lupyne/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
import lucene # flake8: noqa
from .analyzers import Analyzer, TokenFilter
from .queries import Query
from .documents import Document, Field, NestedField, NumericField, DateTimeField
from .documents import Document, Field, NestedField, NumericField, DateTimeField, SpatialField
from .indexers import IndexSearcher, MultiSearcher, IndexWriter, Indexer
from .spatial import PointField

version = tuple(map(int, lucene.VERSION.split('.')))
assert version >= (6,), version
23 changes: 23 additions & 0 deletions lupyne/engine/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,29 @@ def within(self, days=0, weeks=0, utc=True, **delta):
return self.duration(date, days, weeks=weeks, **delta)


class SpatialField(NumericField):
"""Geospatial points, indexed with optional docvalues."""
def items(self, *points):
"""Generate lucene LatLon fields from points (lng, lat)."""
for lng, lat in points:
yield document.LatLonPoint(self.name, lat, lng)
if self.docvalues:
for lng, lat in points:
yield document.LatLonDocValuesField(self.name, lat, lng)

def within(self, lng, lat, distance):
"""Return range queries for any tiles which could be within distance of given point.
:param lng,lat: point
:param distance: search radius in meters
"""
return document.LatLonPoint.newDistanceQuery(self.name, lat, lng, distance)

def distances(self, lng, lat):
"""Return distance SortField."""
return document.LatLonDocValuesField.newDistanceSort(self.name, lat, lng)


class Document(dict):
"""Multimapping of field names to values, but default getters return the first value."""
def __init__(self, doc):
Expand Down
6 changes: 0 additions & 6 deletions lupyne/engine/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from .analyzers import Analyzer
from .queries import suppress, Query, DocValues, SpellParser
from .documents import Field, Document, Hits, GroupingSearch
from .spatial import Distances
from ..utils import long, Atomic, SpellChecker

for cls in (analysis.TokenStream, lucene.JArray_byte):
Expand Down Expand Up @@ -183,11 +182,6 @@ def docvalues(self, name, type=None):
method = getattr(index.MultiDocValues, 'get{}Values'.format(docValuesType))
return getattr(DocValues, docValuesType)(method(self.indexReader, name), len(self), type)

def distances(self, lng, lat, lngfield, latfield):
"""Return distance calculator from given point and DocValue lng/lat fields."""
arrays = (self.docvalues(field, float) for field in (lngfield, latfield))
return Distances(lng, lat, *arrays)

def copy(self, dest, query=None, exclude=None, merge=0):
"""Copy the index to the destination directory.
Expand Down
137 changes: 0 additions & 137 deletions lupyne/engine/spatial.py

This file was deleted.

0 comments on commit 591a7cc

Please sign in to comment.