Skip to content

Commit

Permalink
lib-fts: tokenizer-generic - recognise request for explicit prefix se…
Browse files Browse the repository at this point in the history
…arching

Just store a flag in the tokenizer when the setting is seen, nothing more.

Signed-off-by: Phil Carmody <phil@dovecot.fi>
  • Loading branch information
Phil Carmody authored and villesavolainen committed Feb 12, 2019
1 parent 3a35b9c commit 1bc83fe
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/lib-fts/fts-tokenizer-generic-private.h
Expand Up @@ -40,6 +40,7 @@ enum boundary_algorithm {
struct generic_fts_tokenizer {
struct fts_tokenizer tokenizer;
unsigned int max_length;
bool prefixsplat; /* for search strings, accept a trailing '*' for explicit prefix */
bool wb5a; /* TR29 rule for prefix separation
in e.g. French or Italian. */
bool seen_wb5a;
Expand Down
10 changes: 10 additions & 0 deletions src/lib-fts/fts-tokenizer-generic.c
Expand Up @@ -36,6 +36,8 @@ fts_tokenizer_generic_create(const char *const *settings,
unsigned int max_length = FTS_DEFAULT_TOKEN_MAX_LENGTH;
enum boundary_algorithm algo = BOUNDARY_ALGORITHM_SIMPLE;
bool wb5a = FALSE;
bool search = FALSE;
bool explicitprefix = FALSE;
unsigned int i;

for (i = 0; settings[i] != NULL; i += 2) {
Expand All @@ -61,17 +63,24 @@ fts_tokenizer_generic_create(const char *const *settings,
} else if (strcmp(key, "search") == 0) {
/* tokenizing a search string -
makes no difference to us */
search = TRUE;
} else if (strcasecmp(key, "wb5a") == 0) {
if (strcasecmp(value, "no") == 0)
wb5a = FALSE;
else
wb5a = TRUE;
} else if (strcasecmp(key, "explicitprefix") == 0) {
explicitprefix = TRUE;
} else {
*error_r = t_strdup_printf("Unknown setting: %s", key);
return -1;
}
}

/* Tokenise normally unless tokenising an explicit prefix query */
if (!search)
explicitprefix = FALSE;

if (wb5a && algo != BOUNDARY_ALGORITHM_TR29) {
*error_r = "Can not use WB5a for algorithms other than TR29.";
return -1;
Expand All @@ -85,6 +94,7 @@ fts_tokenizer_generic_create(const char *const *settings,
tok->max_length = max_length;
tok->algorithm = algo;
tok->wb5a = wb5a;
tok->prefixsplat = explicitprefix;
tok->token = buffer_create_dynamic(default_pool, 64);

*tokenizer_r = &tok->tokenizer;
Expand Down

0 comments on commit 1bc83fe

Please sign in to comment.