Skip to content

Commit

Permalink
Issue #108: Index-only scans do not work for partitioned tables and e…
Browse files Browse the repository at this point in the history
…xtended keys

- Introduce handler::init_with_fields() - a type of init function
  in the storage engine that is called *after* TABLE_SHARE's field
  and key objects are populated.

- Add an implementation in ha_rocksdb which sets
  HA_PRIMARY_KEY_IN_READ_INDEX when PK datatypes allow this

- Add an implementation in ha_partition which invokes init_with_fields
  for underlying partitions

- Adjust table.cc:open_binary_frm() so that it calls h->init_with_fields()
  and does a relevant part of "Extended keys" processing after that.

Test Plan: ./mtr --parallel=16 --repeat=16 rocksdb.rocksdb_parts ...

Reviewers: spetrunia, hermanlee4, jkedgar

Reviewed By: jkedgar

Subscribers: yoshinorim, webscalesql-eng

Differential Revision: https://reviews.facebook.net/D51195
Differential Revision: https://reviews.facebook.net/D51951
  • Loading branch information
spetrunia authored and Herman Lee committed Jan 24, 2017
1 parent 9bcbe95 commit 070a257
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 17 deletions.
20 changes: 19 additions & 1 deletion mysql-test/suite/rocksdb/r/rocksdb_parts.result
Expand Up @@ -47,7 +47,7 @@ insert into t1 values (11,20,20);
insert into t1 values (12,20,20);
explain select * from t1 force index(col1) where col1=10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 5 const 2 NULL
1 SIMPLE t1 ref col1 col1 5 const 2000 NULL
select * from t1 force index(col1) where col1=10;
pk col1 col2
2 10 10
Expand All @@ -57,3 +57,21 @@ pk col1 col2
2 10 10
1 10 10
drop table t1;
#
# Issue #108: Index-only scans do not work for partitioned tables and extended keys
#
create table t1 (
pk int primary key,
col1 int,
col2 int,
key (col1)
) engine=rocksdb partition by hash(pk) partitions 2;
insert into t1 values (1,10,10);
insert into t1 values (2,10,10);
insert into t1 values (11,20,20);
insert into t1 values (12,20,20);
# The following must use "Using index"
explain select pk from t1 force index(col1) where col1=10;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref col1 col1 5 const 2000 Using index
drop table t1;
1 change: 1 addition & 0 deletions mysql-test/suite/rocksdb/t/rocksdb_parts-master.opt
@@ -0,0 +1 @@
--rocksdb_debug_optimizer_n_rows=1000
20 changes: 20 additions & 0 deletions mysql-test/suite/rocksdb/t/rocksdb_parts.test
Expand Up @@ -60,3 +60,23 @@ select * from t1 force index(col1) where col1=10;
select * from t1 use index () where col1=10;
drop table t1;

--echo #
--echo # Issue #108: Index-only scans do not work for partitioned tables and extended keys
--echo #
create table t1 (
pk int primary key,
col1 int,
col2 int,
key (col1)
) engine=rocksdb partition by hash(pk) partitions 2;

insert into t1 values (1,10,10);
insert into t1 values (2,10,10);

insert into t1 values (11,20,20);
insert into t1 values (12,20,20);
--echo # The following must use "Using index"
explain select pk from t1 force index(col1) where col1=10;

drop table t1;

16 changes: 16 additions & 0 deletions sql/ha_partition.cc
Expand Up @@ -440,6 +440,22 @@ ha_partition::~ha_partition()
}


bool ha_partition::init_with_fields()
{
/* Pass the call to each partition */
for (uint i= 0; i < m_tot_parts; i++)
{
if (m_file[i]->init_with_fields())
return true;
}
/* Re-read table flags in case init_with_fields caused it to change */
cached_table_flags= (m_file[0]->ha_table_flags() &
~(PARTITION_DISABLED_TABLE_FLAGS)) |
PARTITION_ENABLED_TABLE_FLAGS;
return false;
}


/*
Initialize partition handler object
Expand Down
2 changes: 2 additions & 0 deletions sql/ha_partition.h
Expand Up @@ -291,6 +291,8 @@ class ha_partition :public handler
ha_partition *clone_arg,
MEM_ROOT *clone_mem_root_arg);
~ha_partition();

bool init_with_fields();
/*
A partition handler has no characteristics in itself. It only inherits
those from the underlying handlers. Here we set-up those constants to
Expand Down
2 changes: 2 additions & 0 deletions sql/handler.h
Expand Up @@ -2064,6 +2064,8 @@ class handler :public Sql_alloc
{
cached_table_flags= table_flags();
}

virtual bool init_with_fields() { return false; }
/* ha_ methods: public wrappers for private virtual API */

int ha_open(TABLE *table, const char *name, int mode, int test_if_locked);
Expand Down
48 changes: 33 additions & 15 deletions sql/table.cc
Expand Up @@ -2142,22 +2142,9 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
field->flags|= PRI_KEY_FLAG;

/*
LevelDB & RocksDB SE: we need to re-read to get correct value of
HA_PRIMARY_KEY_IN_READ_INDEX
"if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX)" ... was moved below
for MyRocks
*/
ha_option= handler_file->ha_table_flags();
/*
If this field is part of the primary key and all keys contains
the primary key, then we can use any key to find this column
*/
if (ha_option & HA_PRIMARY_KEY_IN_READ_INDEX)
{
if (field->key_length() == key_part->length &&
!(field->flags & BLOB_FLAG))
field->part_of_key= share->keys_in_use;
if (field->part_of_sortkey.is_set(key))
field->part_of_sortkey= share->keys_in_use;
}
}
if (field->key_length() != key_part->length)
{
Expand Down Expand Up @@ -2216,6 +2203,37 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
(ha_option & HA_ANY_INDEX_MAY_BE_UNIQUE))
set_if_bigger(share->max_unique_length,keyinfo->key_length);
}

/*
The next call is here for MyRocks: Now, we have filled in field and key
definitions, give the storage engine a chance to adjust its properties.
MyRocks may (and typically does) adjust HA_PRIMARY_KEY_IN_READ_INDEX
flag in this call.
*/
if (handler_file->init_with_fields())
goto err;

if (primary_key < MAX_KEY && (handler_file->ha_table_flags() &
HA_PRIMARY_KEY_IN_READ_INDEX))
{
keyinfo= &share->key_info[primary_key];
key_part= keyinfo->key_part;
for (i=0 ; i < keyinfo->user_defined_key_parts ; key_part++,i++)
{
Field *field= key_part->field;
/*
If this field is part of the primary key and all keys contains
the primary key, then we can use any key to find this column
*/
if (field->key_length() == key_part->length &&
!(field->flags & BLOB_FLAG))
field->part_of_key= share->keys_in_use;
if (field->part_of_sortkey.is_set(primary_key))
field->part_of_sortkey= share->keys_in_use;
}
}

if (primary_key < MAX_KEY &&
(share->keys_in_use.is_set(primary_key)))
{
Expand Down
10 changes: 9 additions & 1 deletion storage/rocksdb/ha_rocksdb.cc
Expand Up @@ -2510,6 +2510,14 @@ const char **ha_rocksdb::bas_ext() const
return ha_rocksdb_exts;
}

bool ha_rocksdb::init_with_fields()
{
uint pk= table_share->primary_key;
uint key_parts= table_share->key_info[pk].user_defined_key_parts;
check_keyread_allowed(pk /*PK*/, key_parts-1, true);
cached_table_flags= table_flags();
return false; /* Ok */
}

/*
Convert record from table->record[0] form into a form that can be written
Expand Down Expand Up @@ -3506,7 +3514,7 @@ bool ha_rocksdb::check_keyread_allowed(uint inx, uint part, bool all_parts) cons

uint pk= table_share->primary_key;
if (inx == pk && all_parts &&
part == table_share->key_info[pk].user_defined_key_parts)
part + 1 == table_share->key_info[pk].user_defined_key_parts)
{
pk_can_be_decoded= res;
}
Expand Down
2 changes: 2 additions & 0 deletions storage/rocksdb/ha_rocksdb.h
Expand Up @@ -463,6 +463,8 @@ class ha_rocksdb: public handler
HA_NULL_IN_KEY;
}

bool init_with_fields() override;

/** @brief
This is a bitmap of flags that indicates how the storage engine
implements indexes. The current index flags are documented in
Expand Down

0 comments on commit 070a257

Please sign in to comment.