Skip to content

Commit

Permalink
Fix long term limit (>245).
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsilva committed Jan 25, 2019
1 parent 06b7473 commit a53523d
Showing 1 changed file with 21 additions and 3 deletions.
24 changes: 21 additions & 3 deletions xapian_backend.py
Expand Up @@ -89,6 +89,8 @@ class NotSupportedError(Exception):
# defines the distance given between
# texts with positional information
TERMPOS_DISTANCE = 100
TERM_LENGTH_LIMIT = 200


class InvalidIndexError(HaystackError):
"""Raised when an index can not be opened."""
Expand Down Expand Up @@ -375,11 +377,27 @@ def _add_literal_text(termpos, text, weight, prefix=''):
The sentence is bounded by "^" "$" to allow exact matches.
"""
def add_posting_word(value):
term = '%s%s' % (prefix, value)
document.add_posting(term, termpos, weight)

text = '^ %s $' % text
for word in text.split():
term = '%s%s' % (prefix, word)
document.add_posting(term, termpos, weight)
termpos += 1
# https://trac.xapian.org/wiki/FAQ/UniqueIds#Workingroundthetermlengthlimit
# Working round the term length limit
word = force_str(word)
word_length = len(word)
if word_length > TERM_LENGTH_LIMIT:
start, step = 0, TERM_LENGTH_LIMIT
while start < word_length:
posting_word = word[start: step]
add_posting_word(posting_word)
termpos += 1
start = step
step += TERM_LENGTH_LIMIT
else:
add_posting_word(word)
termpos += 1
termpos += TERMPOS_DISTANCE
return termpos

Expand Down

0 comments on commit a53523d

Please sign in to comment.