From 44d6a8df12cf827701a7a5546368db7f88892f04 Mon Sep 17 00:00:00 2001 From: Manuel Ung Date: Wed, 3 Feb 2021 14:08:33 -0800 Subject: [PATCH] Do not count delete markers during distinct key calculation in properties collector Summary: We should not be including delete markers when calculating number of distinct keys, since they are not visible. Also add an assert to ensure that distinctness is always less than the number of rows in that sst file. Reviewed By: yizhang82 Differential Revision: D26236419 fbshipit-source-id: 1b49419094f --- .../r/optimizer_loose_index_scans.result | 10 +++++----- .../t/optimizer_loose_index_scans-master.opt | 1 + storage/rocksdb/ha_rocksdb.cc | 5 +++++ storage/rocksdb/properties_collector.cc | 20 ++++++++++++++++--- storage/rocksdb/properties_collector.h | 2 +- 5 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 mysql-test/suite/rocksdb/t/optimizer_loose_index_scans-master.opt diff --git a/mysql-test/suite/rocksdb/r/optimizer_loose_index_scans.result b/mysql-test/suite/rocksdb/r/optimizer_loose_index_scans.result index 442b7f6e73fc..c1bb3f57893c 100644 --- a/mysql-test/suite/rocksdb/r/optimizer_loose_index_scans.result +++ b/mysql-test/suite/rocksdb/r/optimizer_loose_index_scans.result @@ -5,12 +5,12 @@ Table Op Msg_type Msg_text test.t analyze status OK show indexes from t; Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment -t 0 PRIMARY 1 a A 100 NULL NULL LSMTREE -t 0 PRIMARY 2 b A 500 NULL NULL LSMTREE -t 0 PRIMARY 3 c A 2500 NULL NULL LSMTREE +t 0 PRIMARY 1 a A 10 NULL NULL LSMTREE +t 0 PRIMARY 2 b A 50 NULL NULL LSMTREE +t 0 PRIMARY 3 c A 250 NULL NULL LSMTREE t 0 PRIMARY 4 d A 2500 NULL NULL LSMTREE -t 1 b 1 b A 50 NULL NULL LSMTREE -t 1 b 2 d A 500 NULL NULL LSMTREE +t 1 b 1 b A 5 NULL NULL LSMTREE +t 1 b 2 d A 50 NULL NULL LSMTREE set optimizer_switch = 'skip_scan=off'; explain select b, d from t where d < 2; id select_type table type possible_keys key key_len ref rows Extra diff --git a/mysql-test/suite/rocksdb/t/optimizer_loose_index_scans-master.opt b/mysql-test/suite/rocksdb/t/optimizer_loose_index_scans-master.opt new file mode 100644 index 000000000000..436edf2b40c6 --- /dev/null +++ b/mysql-test/suite/rocksdb/t/optimizer_loose_index_scans-master.opt @@ -0,0 +1 @@ +--rocksdb_table_stats_sampling_pct=100 diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 24c194c6ba10..72d9d97808fb 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -12639,6 +12639,11 @@ static void adjust_cardinality( if ((uint64_t)stat.m_rows > max_num_rows_scanned) { stat.adjust_cardinality(stat.m_rows / max_num_rows_scanned); } +#ifndef DBUG_OFF + for (size_t i = 0; i < stat.m_distinct_keys_per_prefix.size(); i++) { + DBUG_ASSERT(stat.m_distinct_keys_per_prefix[i] <= stat.m_rows); + } +#endif } } diff --git a/storage/rocksdb/properties_collector.cc b/storage/rocksdb/properties_collector.cc index ebf721f97180..b1cbd3762f60 100644 --- a/storage/rocksdb/properties_collector.cc +++ b/storage/rocksdb/properties_collector.cc @@ -187,7 +187,7 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key, stats->m_actual_disk_size += file_size - m_file_size; m_file_size = file_size; - if (m_keydef != nullptr) { + if (m_keydef != nullptr && type == rocksdb::kEntryPut) { m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats); } } @@ -239,6 +239,20 @@ rocksdb::Status Rdb_tbl_prop_coll::Finish( for (Rdb_index_stats &stat : m_stats) { m_cardinality_collector.SetCardinality(&stat); m_cardinality_collector.AdjustStats(&stat); + + // Adjust cardinality down if it exceeds total number of rows. + if (stat.m_distinct_keys_per_prefix.size()) { + int64_t max_distinct_keys = stat.m_distinct_keys_per_prefix.back(); + if (max_distinct_keys > stat.m_rows) { + stat.adjust_cardinality(static_cast(stat.m_rows) / + max_distinct_keys); + } + } +#ifndef DBUG_OFF + for (size_t i = 0; i < stat.m_distinct_keys_per_prefix.size(); i++) { + DBUG_ASSERT(stat.m_distinct_keys_per_prefix[i] <= stat.m_rows); + } +#endif } m_recorded = true; @@ -496,9 +510,9 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool increment, } } -void Rdb_index_stats::adjust_cardinality(uint64_t adjustment_factor) { +void Rdb_index_stats::adjust_cardinality(double adjustment_factor) { for (int64_t &num_keys : m_distinct_keys_per_prefix) { - num_keys = num_keys * adjustment_factor; + num_keys = MY_MAX(1, num_keys * adjustment_factor); } } diff --git a/storage/rocksdb/properties_collector.h b/storage/rocksdb/properties_collector.h index 4fc460b8b745..571a6945ab02 100644 --- a/storage/rocksdb/properties_collector.h +++ b/storage/rocksdb/properties_collector.h @@ -75,7 +75,7 @@ struct Rdb_index_stats { void merge(const Rdb_index_stats &s, const bool increment = true, const int64_t estimated_data_len = 0); - void adjust_cardinality(uint64_t adjustment_factor); + void adjust_cardinality(double adjustment_factor); void reset_cardinality(); };