diff --git a/src/plugins/fts/fts-api-private.h b/src/plugins/fts/fts-api-private.h index e569143041..4fd12e9006 100644 --- a/src/plugins/fts/fts-api-private.h +++ b/src/plugins/fts/fts-api-private.h @@ -122,6 +122,8 @@ int fts_index_have_compatible_settings(struct mailbox_list *list, /* Returns TRUE if FTS backend should index the header for optimizing separate lookups */ bool fts_header_want_indexed(const char *hdr_name); +/* Returns TRUE if header's values should be considered to have a language. */ +bool fts_header_has_language(const char *hdr_name); int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r); diff --git a/src/plugins/fts/fts-api.c b/src/plugins/fts/fts-api.c index f598ccf9d8..66c09c900a 100644 --- a/src/plugins/fts/fts-api.c +++ b/src/plugins/fts/fts-api.c @@ -491,6 +491,25 @@ bool fts_header_want_indexed(const char *hdr_name) return FALSE; } +bool fts_header_has_language(const char *hdr_name) +{ + /* FIXME: should email address headers be detected as different + languages? That mainly contains people's names.. */ + /*if (message_header_is_address(hdr_name)) + return TRUE;*/ + + /* Subject definitely contains language-specific data that can be + detected. Comment and Keywords headers also could contain, although + just about nobody uses those headers. + + For now we assume that other headers contain non-language specific + data that we don't want to filter in special ways. For example + it is good to be able to search for Message-IDs. */ + return strcasecmp(hdr_name, "Subject") == 0 || + strcasecmp(hdr_name, "Comments") == 0 || + strcasecmp(hdr_name, "Keywords") == 0; +} + int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r) { struct mailbox_metadata metadata; diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c index 0ca251d027..4d1a34a7d8 100644 --- a/src/plugins/fts/fts-build-mail.c +++ b/src/plugins/fts/fts-build-mail.c @@ -67,25 +67,6 @@ fts_build_parse_content_disposition(struct fts_mail_build_context *ctx, i_strndup(hdr->full_value, hdr->full_value_len); } -static bool header_has_language(const char *name) -{ - /* FIXME: should email address headers be detected as different - languages? That mainly contains people's names.. */ - /*if (message_header_is_address(name)) - return TRUE;*/ - - /* Subject definitely contains language-specific data that can be - detected. Comment and Keywords headers also could contain, although - just about nobody uses those headers. - - For now we assume that other headers contain non-language specific - data that we don't want to filter in special ways. For example - it is good to be able to search for Message-IDs. */ - return strcasecmp(name, "Subject") == 0 || - strcasecmp(name, "Comments") == 0 || - strcasecmp(name, "Keywords") == 0; -} - static void fts_parse_mail_header(struct fts_mail_build_context *ctx, const struct message_block *raw_block) { @@ -157,7 +138,7 @@ fts_build_tokenized_hdr_update_lang(struct fts_mail_build_context *ctx, human languages, so we have a list of some hardcoded header names and we'll also assume that if there's any 8bit content it's a human language. */ - if (header_has_language(hdr->name) || + if (fts_header_has_language(hdr->name) || data_has_8bit(hdr->full_value, hdr->full_value_len)) ctx->cur_user_lang = NULL; else {