From 6d292e2a1ce14195f7dc2045a5a8c3ef9d2d161a Mon Sep 17 00:00:00 2001
From: Phil Carmody <phil@dovecot.fi>
Date: Fri, 11 May 2018 13:34:40 +0300
Subject: [PATCH] lib-fts: generic simple tokeniser - distinguish "letters"
 from non-"letters"

prev_type is only compared against SINGLE_QUOTE, so there will be no
behavioural differences. However, maintaining the state that we've just
seen something we are prepared to search for (very loosely, a "letter")
rather than something that we threw away (word breaks) will be important
when it comes to explicit prefix query parsing.

Signed-off-by: Phil Carmody <phil@dovecot.fi>
---
 src/lib-fts/fts-tokenizer-generic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/lib-fts/fts-tokenizer-generic.c b/src/lib-fts/fts-tokenizer-generic.c
index 0ac61dea38..1bcc8db4eb 100644
--- a/src/lib-fts/fts-tokenizer-generic.c
+++ b/src/lib-fts/fts-tokenizer-generic.c
@@ -255,7 +255,10 @@ fts_tokenizer_generic_simple_next(struct fts_tokenizer *_tok,
 			start = i + char_size;
 			shift_prev_type(tok, LETTER_TYPE_SINGLE_QUOTE);
 		} else {
-			shift_prev_type(tok, LETTER_TYPE_NONE);
+			/* Lie slightly about the type. This is anything that
+			   we're not skipping or cutting on and are prepared to
+			   search for - it's "as good as" a letter. */
+			shift_prev_type(tok, LETTER_TYPE_ALETTER);
 		}
 	}
 	/* word boundary not found yet */