From 6d292e2a1ce14195f7dc2045a5a8c3ef9d2d161a Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Fri, 11 May 2018 13:34:40 +0300 Subject: [PATCH] lib-fts: generic simple tokeniser - distinguish "letters" from non-"letters" prev_type is only compared against SINGLE_QUOTE, so there will be no behavioural differences. However, maintaining the state that we've just seen something we are prepared to search for (very loosely, a "letter") rather than something that we threw away (word breaks) will be important when it comes to explicit prefix query parsing. Signed-off-by: Phil Carmody --- src/lib-fts/fts-tokenizer-generic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lib-fts/fts-tokenizer-generic.c b/src/lib-fts/fts-tokenizer-generic.c index 0ac61dea38..1bcc8db4eb 100644 --- a/src/lib-fts/fts-tokenizer-generic.c +++ b/src/lib-fts/fts-tokenizer-generic.c @@ -255,7 +255,10 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok, start = i + char_size; shift_prev_type(tok, LETTER_TYPE_SINGLE_QUOTE); } else { - shift_prev_type(tok, LETTER_TYPE_NONE); + /* Lie slightly about the type. This is anything that + we're not skipping or cutting on and are prepared to + search for - it's "as good as" a letter. */ + shift_prev_type(tok, LETTER_TYPE_ALETTER); } } /* word boundary not found yet */