From 419f8077ae0f3a86985ada77426b26ebb8a7ae64 Mon Sep 17 00:00:00 2001 From: Robert Stepanek Date: Tue, 16 Apr 2024 12:12:51 +0200 Subject: [PATCH] xapian_wrap: ignore punct-only terms only in unstructed search parts This fixes a regression in 0c282379154a776099a83f120311a973c628eb55 Punctuation-only terms only should be ignored in unstructured search parts, such as bodies and subjects. --- .../JMAPEmail/email_query_punct_no_text | 56 +++++++++++++++++++ imap/xapian_wrap.cpp | 36 +++++++----- 2 files changed, 79 insertions(+), 13 deletions(-) create mode 100644 cassandane/tiny-tests/JMAPEmail/email_query_punct_no_text diff --git a/cassandane/tiny-tests/JMAPEmail/email_query_punct_no_text b/cassandane/tiny-tests/JMAPEmail/email_query_punct_no_text new file mode 100644 index 0000000000..f6c65b7c27 --- /dev/null +++ b/cassandane/tiny-tests/JMAPEmail/email_query_punct_no_text @@ -0,0 +1,56 @@ +#!perl +use Cassandane::Tiny; + +sub test_email_query_punct_no_text + :needs_component_sieve :needs_component_jmap :JMAPExtensions +{ + my ($self) = @_; + my $imap = $self->{store}->get_client(); + + $imap->create("matches") or die; + + # Assert that punctuation-only terms in non-text criteria + # match nothing. Also see email_query_utf8punct_term. + + $self->{instance}->install_sieve_script(<<'EOF' +require ["x-cyrus-jmapquery", "x-cyrus-log", "variables", "fileinto"]; +# Search: "from:\"=\"" +if allof( + not string :is "${stop}" "Y", + jmapquery text: + { + "conditions" : [ + { + "from" : "\"=\"" + } + ], + "operator" : "OR" + } +. +) { + fileinto "matches"; + set "stop" "Y"; +} +EOF + ); + + my $mime = <<'EOF'; +From: from@local +To: to@local +Subject: test +Date: Mon, 13 Apr 2020 15:34:03 +0200 +MIME-Version: 1.0 +Content-Type: text/plain;charset=us-ascii +Content-Transfer-Encoding: 7bit + +hello +EOF + $mime =~ s/\r?\n/\r\n/gs; + my $msg = Cassandane::Message->new(); + $msg->set_lines(split /\n/, $mime); + $self->{instance}->deliver($msg); + + xlog "Assert that message did not match"; + $self->assert_num_equals(0, $imap->message_count('matches')); + $self->assert_num_equals(1, $imap->message_count('INBOX')); +} diff --git a/imap/xapian_wrap.cpp b/imap/xapian_wrap.cpp index df1c8c4b50..5911a7e914 100644 --- a/imap/xapian_wrap.cpp +++ b/imap/xapian_wrap.cpp @@ -1945,18 +1945,33 @@ xapian_query_new_match_internal(const xapian_db_t *db, int partnum, const char * return query_new_type(db, prefix, str); } - // Don't stem queries for Thaana codepage (0780) or higher. + // Match unstructured search parts + + static Xapian::Query *q = NULL; + + int need_word_break = 0; for (const unsigned char *p = (const unsigned char *)str; *p; p++) { - if (*p > 221) //has highbit - return xapian_query_new_match_word_break(db, str, prefix); + // Use ICU word break for Thaana codepage (0780) or higher. + if (*p > 221) { + need_word_break = 1; + break; + } } - // Stemable codepage. - Xapian::TermGenerator::stem_strategy stem_strategy = - get_stem_strategy(XAPIAN_DB_CURRENT_VERSION, partnum); - - return query_new_textmatch(db, str, prefix, stem_strategy); + if (need_word_break) { + q = xapian_query_new_match_word_break(db, str, prefix); + } + else { + Xapian::TermGenerator::stem_strategy stem_strategy = + get_stem_strategy(XAPIAN_DB_CURRENT_VERSION, partnum); + q = query_new_textmatch(db, str, prefix, stem_strategy); + } + if (q && q->get_type() == Xapian::Query::LEAF_MATCH_NOTHING) { + delete q; + q = NULL; + } + return q; } catch (const Xapian::Error &err) { xsyslog(LOG_ERR, "IOERROR: caught exception", "exception=<%s>", @@ -2004,11 +2019,6 @@ xapian_query_new_match(const xapian_db_t *db, int partnum, const char *str) charset_free(&utf8); } - if (q && q->get_type() == Xapian::Query::LEAF_MATCH_NOTHING) { - delete q; - q = NULL; - } - return (xapian_query_t*) q; }