Skip to content

Commit

Permalink
Do not count delete markers during distinct key calculation in proper…
Browse files Browse the repository at this point in the history
…ties collector

Summary: We should not be including delete markers when calculating number of distinct keys, since they are not visible. Also add an assert to ensure that distinctness is always less than the number of rows in that sst file.

Reviewed By: yizhang82

Differential Revision: D26236419

fbshipit-source-id: 1b49419094f
  • Loading branch information
lth authored and facebook-github-bot committed Feb 5, 2021
1 parent 25b42b5 commit 44d6a8d
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 9 deletions.
10 changes: 5 additions & 5 deletions mysql-test/suite/rocksdb/r/optimizer_loose_index_scans.result
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ Table Op Msg_type Msg_text
test.t analyze status OK
show indexes from t;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
t 0 PRIMARY 1 a A 100 NULL NULL LSMTREE
t 0 PRIMARY 2 b A 500 NULL NULL LSMTREE
t 0 PRIMARY 3 c A 2500 NULL NULL LSMTREE
t 0 PRIMARY 1 a A 10 NULL NULL LSMTREE
t 0 PRIMARY 2 b A 50 NULL NULL LSMTREE
t 0 PRIMARY 3 c A 250 NULL NULL LSMTREE
t 0 PRIMARY 4 d A 2500 NULL NULL LSMTREE
t 1 b 1 b A 50 NULL NULL LSMTREE
t 1 b 2 d A 500 NULL NULL LSMTREE
t 1 b 1 b A 5 NULL NULL LSMTREE
t 1 b 2 d A 50 NULL NULL LSMTREE
set optimizer_switch = 'skip_scan=off';
explain select b, d from t where d < 2;
id select_type table type possible_keys key key_len ref rows Extra
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--rocksdb_table_stats_sampling_pct=100
5 changes: 5 additions & 0 deletions storage/rocksdb/ha_rocksdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12639,6 +12639,11 @@ static void adjust_cardinality(
if ((uint64_t)stat.m_rows > max_num_rows_scanned) {
stat.adjust_cardinality(stat.m_rows / max_num_rows_scanned);
}
#ifndef DBUG_OFF
for (size_t i = 0; i < stat.m_distinct_keys_per_prefix.size(); i++) {
DBUG_ASSERT(stat.m_distinct_keys_per_prefix[i] <= stat.m_rows);
}
#endif
}
}

Expand Down
20 changes: 17 additions & 3 deletions storage/rocksdb/properties_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void Rdb_tbl_prop_coll::CollectStatsForRow(const rocksdb::Slice &key,
stats->m_actual_disk_size += file_size - m_file_size;
m_file_size = file_size;

if (m_keydef != nullptr) {
if (m_keydef != nullptr && type == rocksdb::kEntryPut) {
m_cardinality_collector.ProcessKey(key, m_keydef.get(), stats);
}
}
Expand Down Expand Up @@ -239,6 +239,20 @@ rocksdb::Status Rdb_tbl_prop_coll::Finish(
for (Rdb_index_stats &stat : m_stats) {
m_cardinality_collector.SetCardinality(&stat);
m_cardinality_collector.AdjustStats(&stat);

// Adjust cardinality down if it exceeds total number of rows.
if (stat.m_distinct_keys_per_prefix.size()) {
int64_t max_distinct_keys = stat.m_distinct_keys_per_prefix.back();
if (max_distinct_keys > stat.m_rows) {
stat.adjust_cardinality(static_cast<double>(stat.m_rows) /
max_distinct_keys);
}
}
#ifndef DBUG_OFF
for (size_t i = 0; i < stat.m_distinct_keys_per_prefix.size(); i++) {
DBUG_ASSERT(stat.m_distinct_keys_per_prefix[i] <= stat.m_rows);
}
#endif
}

m_recorded = true;
Expand Down Expand Up @@ -496,9 +510,9 @@ void Rdb_index_stats::merge(const Rdb_index_stats &s, const bool increment,
}
}

void Rdb_index_stats::adjust_cardinality(uint64_t adjustment_factor) {
void Rdb_index_stats::adjust_cardinality(double adjustment_factor) {
for (int64_t &num_keys : m_distinct_keys_per_prefix) {
num_keys = num_keys * adjustment_factor;
num_keys = MY_MAX(1, num_keys * adjustment_factor);
}
}

Expand Down
2 changes: 1 addition & 1 deletion storage/rocksdb/properties_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct Rdb_index_stats {
void merge(const Rdb_index_stats &s, const bool increment = true,
const int64_t estimated_data_len = 0);

void adjust_cardinality(uint64_t adjustment_factor);
void adjust_cardinality(double adjustment_factor);

void reset_cardinality();
};
Expand Down

0 comments on commit 44d6a8d

Please sign in to comment.