Skip to content

Commit

Permalink
xapian_wrap: ignore punct-only terms only in unstructed search parts
Browse files Browse the repository at this point in the history
This fixes a regression in 0c28237
Punctuation-only terms only should be ignored in unstructured search
parts, such as bodies and subjects.
  • Loading branch information
rsto committed Apr 16, 2024
1 parent 0425439 commit 419f807
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 13 deletions.
56 changes: 56 additions & 0 deletions cassandane/tiny-tests/JMAPEmail/email_query_punct_no_text
@@ -0,0 +1,56 @@
#!perl
use Cassandane::Tiny;

sub test_email_query_punct_no_text
:needs_component_sieve :needs_component_jmap :JMAPExtensions
{
my ($self) = @_;
my $imap = $self->{store}->get_client();

$imap->create("matches") or die;

# Assert that punctuation-only terms in non-text criteria
# match nothing. Also see email_query_utf8punct_term.

$self->{instance}->install_sieve_script(<<'EOF'
require ["x-cyrus-jmapquery", "x-cyrus-log", "variables", "fileinto"];
# Search: "from:\"=\""
if allof(
not string :is "${stop}" "Y",
jmapquery text:
{
"conditions" : [
{
"from" : "\"=\""
}
],
"operator" : "OR"
}
.
) {
fileinto "matches";
set "stop" "Y";
}
EOF
);

my $mime = <<'EOF';
From: from@local
To: to@local
Subject: test
Date: Mon, 13 Apr 2020 15:34:03 +0200
MIME-Version: 1.0
Content-Type: text/plain;charset=us-ascii
Content-Transfer-Encoding: 7bit
hello
EOF
$mime =~ s/\r?\n/\r\n/gs;
my $msg = Cassandane::Message->new();
$msg->set_lines(split /\n/, $mime);
$self->{instance}->deliver($msg);

xlog "Assert that message did not match";
$self->assert_num_equals(0, $imap->message_count('matches'));
$self->assert_num_equals(1, $imap->message_count('INBOX'));
}
36 changes: 23 additions & 13 deletions imap/xapian_wrap.cpp
Expand Up @@ -1945,18 +1945,33 @@ xapian_query_new_match_internal(const xapian_db_t *db, int partnum, const char *
return query_new_type(db, prefix, str);
}

// Don't stem queries for Thaana codepage (0780) or higher.
// Match unstructured search parts

static Xapian::Query *q = NULL;

int need_word_break = 0;
for (const unsigned char *p = (const unsigned char *)str; *p; p++) {
if (*p > 221) //has highbit
return xapian_query_new_match_word_break(db, str, prefix);
// Use ICU word break for Thaana codepage (0780) or higher.
if (*p > 221) {
need_word_break = 1;
break;
}
}

// Stemable codepage.
Xapian::TermGenerator::stem_strategy stem_strategy =
get_stem_strategy(XAPIAN_DB_CURRENT_VERSION, partnum);

return query_new_textmatch(db, str, prefix, stem_strategy);
if (need_word_break) {
q = xapian_query_new_match_word_break(db, str, prefix);
}
else {
Xapian::TermGenerator::stem_strategy stem_strategy =
get_stem_strategy(XAPIAN_DB_CURRENT_VERSION, partnum);
q = query_new_textmatch(db, str, prefix, stem_strategy);
}
if (q && q->get_type() == Xapian::Query::LEAF_MATCH_NOTHING) {
delete q;
q = NULL;
}

return q;
} catch (const Xapian::Error &err) {
xsyslog(LOG_ERR, "IOERROR: caught exception",
"exception=<%s>",
Expand Down Expand Up @@ -2004,11 +2019,6 @@ xapian_query_new_match(const xapian_db_t *db, int partnum, const char *str)
charset_free(&utf8);
}

if (q && q->get_type() == Xapian::Query::LEAF_MATCH_NOTHING) {
delete q;
q = NULL;
}

return (xapian_query_t*) q;
}

Expand Down

0 comments on commit 419f807

Please sign in to comment.