Skip to content

Commit

Permalink
cql3: Demand ALLOW FILTERING for unlimited, sliced partitions
Browse files Browse the repository at this point in the history
When a query requests a partition slice but doesn't limit the number
of partitions, require that it also says ALLOW FILTERING.  Although
do_filter() isn't invoked for such queries, the performance can still
be unexpectedly slow, and we want to signal that to the user by
demanding they explicitly say ALLOW FILTERING.

Because we now reject queries that worked fine before, existing
applications can break.  Therefore, the behavior is controlled by a
flag currently defaulting to off.  We will default to "on" in the next
Scylla version.

Fixes scylladb#7608; see comments there for justification.

Signed-off-by: Dejan Mircevski <dejan@scylladb.com>
  • Loading branch information
Dejan Mircevski committed Aug 8, 2021
1 parent ba55769 commit ffbe9b8
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 10 deletions.
5 changes: 4 additions & 1 deletion cql3/statements/raw/select_statement.hh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "cql3/statements/prepared_statement.hh"
#include "cql3/relation.hh"
#include "cql3/attributes.hh"
#include "db/config.hh"
#include <seastar/core/shared_ptr.hh>

namespace cql3 {
Expand Down Expand Up @@ -150,7 +151,9 @@ private:
bool is_reversed(const schema& schema) const;

/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
void check_needs_filtering(const restrictions::statement_restrictions& restrictions);
void check_needs_filtering(
const restrictions::statement_restrictions& restrictions,
db::tri_mode_restriction_t::mode strict_allow_filtering);

void ensure_filtering_columns_retrieval(database& db,
selection::selection& selection,
Expand Down
38 changes: 32 additions & 6 deletions cql3/statements/select_statement.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@

bool is_system_keyspace(std::string_view name);

static logging::logger mylog("select_statement");

namespace cql3 {

namespace statements {
Expand Down Expand Up @@ -1413,7 +1415,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
is_reversed_ = is_reversed(*schema);
}

check_needs_filtering(*restrictions);
check_needs_filtering(*restrictions, db.get_config().strict_allow_filtering());
ensure_filtering_columns_retrieval(db, *selection, *restrictions);
auto group_by_cell_indices = ::make_shared<std::vector<size_t>>(prepare_group_by(*schema, *selection));

Expand Down Expand Up @@ -1627,15 +1629,39 @@ bool select_statement::is_reversed(const schema& schema) const {
return is_reversed_;
}

/// True iff restrictions require ALLOW FILTERING despite there being no coordinator-side filtering.
static bool needs_allow_filtering_anyway(
const restrictions::statement_restrictions& restrictions,
db::tri_mode_restriction_t::mode strict_allow_filtering) {
using flag_t = db::tri_mode_restriction_t::mode;
if (strict_allow_filtering == flag_t::FALSE) {
return false;
}
const auto& ck_restrictions = *restrictions.get_clustering_columns_restrictions();
const auto& pk_restrictions = *restrictions.get_partition_key_restrictions();
// Even if no filtering happens on the coordinator, we still warn about poor performance when partition
// slice is defined but in potentially unlimited number of partitions (see #7608).
if ((pk_restrictions.empty() || has_token(pk_restrictions.expression)) // Potentially unlimited partitions.
&& !ck_restrictions.empty() // Slice defined.
&& !restrictions.uses_secondary_indexing()) { // Base-table is used. (Index-table use always limits partitions.)
if (strict_allow_filtering == flag_t::WARN) {
mylog.warn("This query should use ALLOW FILTERING and will be rejected in future versions: {}",
restrictions.to_string());
return false;
}
return true;
}
return false;
}

/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
void select_statement::check_needs_filtering(const restrictions::statement_restrictions& restrictions)
void select_statement::check_needs_filtering(
const restrictions::statement_restrictions& restrictions,
db::tri_mode_restriction_t::mode strict_allow_filtering)
{
// non-key-range non-indexed queries cannot involve filtering underneath
if (!_parameters->allow_filtering() && (restrictions.is_key_range() || restrictions.uses_secondary_indexing())) {
// We will potentially filter data if either:
// - Have more than one IndexExpression
// - Have no index expression and the column filter is not the identity
if (restrictions.need_filtering()) {
if (restrictions.need_filtering() || needs_allow_filtering_anyway(restrictions, strict_allow_filtering)) {
throw exceptions::invalid_request_exception(
"Cannot execute this query as it might involve data filtering and "
"thus may have unpredictable performance. If you want to execute "
Expand Down
5 changes: 5 additions & 0 deletions db/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ class convert<enum_option<db::tri_mode_restriction_t>> {
#define str(x) #x
#define _mk_init(name, type, deflt, status, desc, ...) , name(this, str(name), value_status::status, type(deflt), desc)

static db::tri_mode_restriction_t::mode strict_allow_filtering_default() {
return db::tri_mode_restriction_t::mode::WARN; // TODO: make it TRUE after Scylla 4.6.
}

db::config::config(std::shared_ptr<db::extensions> exts)
: utils::config_file()

Expand Down Expand Up @@ -802,6 +806,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"Maximum number of concurrent requests a single shard can handle before it starts shedding extra load. By default, no requests will be shed.")
, cdc_dont_rewrite_streams(this, "cdc_dont_rewrite_streams", value_status::Used, false,
"Disable rewriting streams from cdc_streams_descriptions to cdc_streams_descriptions_v2. Should not be necessary, but the procedure is expensive and prone to failures; this config option is left as a backdoor in case some user requires manual intervention.")
, strict_allow_filtering(this, "strict_allow_filtering", value_status::Used, strict_allow_filtering_default(), "Match Cassandra in requiring ALLOW FILTERING on slow queries. Can be true, false, or warn. When false, Scylla accepts some slow queries even without ALLOW FILTERING that Cassandra rejects. Warn is same as false, but with warning.")
, alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port")
, alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port")
, alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address")
Expand Down
1 change: 1 addition & 0 deletions db/config.hh
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ public:
named_value<uint32_t> schema_registry_grace_period;
named_value<uint32_t> max_concurrent_requests_per_shard;
named_value<bool> cdc_dont_rewrite_streams;
named_value<tri_mode_restriction> strict_allow_filtering;

named_value<uint16_t> alternator_port;
named_value<uint16_t> alternator_https_port;
Expand Down
34 changes: 34 additions & 0 deletions test/boost/restrictions_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include "cql3/cql_config.hh"
#include "cql3/values.hh"
#include "db/config.hh"
#include "test/lib/cql_assertions.hh"
#include "test/lib/cql_test_env.hh"
#include "test/lib/exception_utils.hh"
Expand Down Expand Up @@ -927,3 +928,36 @@ SEASTAR_THREAD_TEST_CASE(tuples) {
require_rows(e, "select q from t where q>(99) allow filtering", {});
}).get();
}

SEASTAR_THREAD_TEST_CASE(strict_allow_filtering) {
auto cfg = make_shared<db::config>();
cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::TRUE);
do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table t (p int, c int, primary key (p,c))");
require_rows(e, "select p from t where c>0 allow filtering", {});
BOOST_REQUIRE_EXCEPTION(
e.execute_cql("select p from t where c>0").get(),
exceptions::invalid_request_exception,
exception_predicate::message_contains("use ALLOW FILTERING"));
}, cfg).get();
}

SEASTAR_THREAD_TEST_CASE(nonstrict_allow_filtering) {
auto cfg = make_shared<db::config>();
cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::FALSE);
do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table t (p int, c int, primary key (p,c))");
require_rows(e, "select p from t where c>0 allow filtering", {});
require_rows(e, "select p from t where c>0", {});
}, cfg).get();
}

SEASTAR_THREAD_TEST_CASE(warn_strict_allow_filtering) {
auto cfg = make_shared<db::config>();
cfg->strict_allow_filtering(db::tri_mode_restriction_t::mode::WARN);
do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table t (p int, c int, primary key (p,c))");
require_rows(e, "select p from t where c>0 allow filtering", {});
require_rows(e, "select p from t where c>0", {});
}, cfg).get();
}
9 changes: 6 additions & 3 deletions test/cql-pytest/test_allow_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ def test_allow_filtering_regular_column(cql, table1):
# partition it needs to inspect the promoted index and in the worst case,
# also read a chunk of rows to look for the matching clustering-column value.
# Reproduces issue #7608.
@pytest.mark.xfail(reason="issue #7608")
def test_allow_filtering_clustering_key(cql, table1):
check_af_mandatory(cql, table1, 'c=2', lambda row: row.c==2)
check_af_mandatory(cql, table1, 'c>2', lambda row: row.c>2)
Expand All @@ -136,7 +135,6 @@ def test_allow_filtering_clustering_key(cql, table1):
# ranges - because although they are finite, they may still contain a lot
# of partitions.
# Reproduces issue #7608.
@pytest.mark.xfail(reason="issue #7608")
def test_allow_filtering_clustering_key_token_range(cql, table1):
# TODO: the current implementation of check_af_mandatory() doesn't know
# how to request or compare results with the token, so we pass None to
Expand All @@ -153,7 +151,6 @@ def test_allow_filtering_clustering_key_token_range(cql, table1):
# this case. Note that 'c=2 AND k = 123' (without the "token"), which
# is guaranteed to match just one partition, doesn't need ALLOW FILTERING.
# Reproduces issue #7608.
@pytest.mark.xfail(reason="issue #7608")
def test_allow_filtering_clustering_key_token_specific(cql, table1):
check_af_mandatory(cql, table1, 'c=2 AND token(k) = 123', None)
check_af_optional(cql, table1, 'c=2 AND k = 123', None)
Expand Down Expand Up @@ -237,6 +234,12 @@ def table3(cql, test_keyspace):
yield (table, everything)
cql.execute("DROP TABLE " + table)

def test_allow_filtering_multi_column(cql, table3):
"""Multi-column restrictions are just like other clustering restrictions"""
check_af_mandatory(cql, table3, '(c1,c2)=(10,10)', lambda row: row.c1==10 and row.c2==10)
check_af_optional(cql, table3, '(c1,c2)=(10,10) and k1=1 and k2=1',
lambda row: row.c1==10 and row.c2==10 and row.k1==1 and row.k2==1)

# In test_allow_filtering_indexed_a_and_k() above we noted that the
# combination of an indexed column and the partition key can be searched
# without filtering, and explained why. The same cannot be said for the
Expand Down

0 comments on commit ffbe9b8

Please sign in to comment.