From a411018396ba2f16e6848e0a90923be7f40c94ec Mon Sep 17 00:00:00 2001 From: Sergei Petrunia Date: Wed, 24 Sep 2014 07:06:31 +0400 Subject: [PATCH] Issue #17: Automatic per-index column families Add support for INDEX idx(col1, ...) COMMENT '$per_index_cf' --- mysql-test/r/rocksdb.result | 28 ++++++++ mysql-test/t/rocksdb.test | 32 +++++++++ storage/rocksdb/ha_rocksdb.cc | 54 +++++++++++++-- storage/rocksdb/ha_rocksdb.h | 19 ++++++ storage/rocksdb/rdb_cf_manager.cc | 108 ++++++++++++++++++++++++------ storage/rocksdb/rdb_cf_manager.h | 19 +++++- storage/rocksdb/rdb_datadic.cc | 3 +- storage/rocksdb/rdb_datadic.h | 10 ++- 8 files changed, 238 insertions(+), 35 deletions(-) diff --git a/mysql-test/r/rocksdb.result b/mysql-test/r/rocksdb.result index 15ff223500e7..cf48341f2fe2 100644 --- a/mysql-test/r/rocksdb.result +++ b/mysql-test/r/rocksdb.result @@ -1410,3 +1410,31 @@ PRIMARY KEY (id) INSERT INTO t1 SET id=123, blob_col=repeat('z',64000) ON DUPLICATE KEY UPDATE blob_col=VALUES(blob_col); INSERT INTO t1 SET id=123, blob_col='' ON DUPLICATE KEY UPDATE blob_col=VALUES(blob_col); DROP TABLE t1; +# +# Issue #17: Automatic per-index column families +# +create table t1 ( +id int not null, +key1 int, +PRIMARY KEY (id), +index (key1) comment '$per_index_cf' +) engine=rocksdb; +# Unfortunately there is no way to check which column family everything goes to +insert into t1 values (1,1); +select * from t1; +id key1 +1 1 +# Check that ALTER and RENAME are disallowed +alter table t1 add col2 int; +ERROR 42000: This version of MySQL doesn't yet support 'ALTER TABLE on table with per-index CF' +rename table t1 to t2; +ERROR 42000: This version of MySQL doesn't yet support 'ALTER TABLE on table with per-index CF' +drop table t1; +# Check detection of typos in $per_index_cf +create table t1 ( +id int not null, +key1 int, +PRIMARY KEY (id), +index (key1) comment '$per_idnex_cf' +)engine=rocksdb; +ERROR 42000: This version of MySQL doesn't yet support 'column family name looks like a typo of $per_index_cf' diff --git a/mysql-test/t/rocksdb.test b/mysql-test/t/rocksdb.test index c897a2f7b9c5..ff48631a25a2 100644 --- a/mysql-test/t/rocksdb.test +++ b/mysql-test/t/rocksdb.test @@ -1251,3 +1251,35 @@ INSERT INTO t1 SET id=123, blob_col='' ON DUPLICATE KEY UPDATE bl DROP TABLE t1; +--echo # +--echo # Issue #17: Automatic per-index column families +--echo # +create table t1 ( + id int not null, + key1 int, + PRIMARY KEY (id), + index (key1) comment '$per_index_cf' +) engine=rocksdb; + +--echo # Unfortunately there is no way to check which column family everything goes to +insert into t1 values (1,1); +select * from t1; + +--echo # Check that ALTER and RENAME are disallowed +--error ER_NOT_SUPPORTED_YET +alter table t1 add col2 int; + +--error ER_NOT_SUPPORTED_YET +rename table t1 to t2; + +drop table t1; + +--echo # Check detection of typos in \$per_index_cf +--error ER_NOT_SUPPORTED_YET +create table t1 ( + id int not null, + key1 int, + PRIMARY KEY (id), + index (key1) comment '$per_idnex_cf' +)engine=rocksdb; + diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 64de0380b506..ab7db6c0f1cb 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -1207,13 +1207,16 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) uint key_len= table->key_info[table->s->primary_key].key_length; - const char *comment= table->key_info[table->s->primary_key].comment.str; + const char *comment= table->key_info[table->s->primary_key].comment.str; + const char *idx_name= table->key_info[table->s->primary_key].name; rocksdb::ColumnFamilyHandle *cf_handle; - if (!(cf_handle= cf_manager.get_cf(comment))) + bool is_auto_cf; + if (!(cf_handle= cf_manager.get_cf(comment, fullname.c_ptr(), idx_name, + &is_auto_cf))) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); pk_descr->setup(table); // move this into get_share() ?? - pk_descr->set_cf_handle(cf_handle); + pk_descr->set_cf_handle(cf_handle, is_auto_cf); uint packed_key_len= pk_descr->max_storage_fmt_length(); @@ -1229,11 +1232,13 @@ int ha_rocksdb::open(const char *name, int mode, uint test_if_locked) continue; comment= table->key_info[i].comment.str; - if (!(cf_handle= cf_manager.get_cf(comment))) + idx_name= table->key_info[i].name; + if (!(cf_handle= cf_manager.get_cf(comment, fullname.c_ptr(), idx_name, + &is_auto_cf))) DBUG_RETURN(HA_ERR_INTERNAL_ERROR); key_descr[i]->setup(table); // move this into get_share() ?? - key_descr[i]->set_cf_handle(cf_handle); + key_descr[i]->set_cf_handle(cf_handle, is_auto_cf); uint packed_len= key_descr[i]->max_storage_fmt_length(); if (packed_len > max_packed_sec_key_len) @@ -1352,12 +1357,21 @@ int ha_rocksdb::create_key_defs(TABLE *table_arg, const char *db_table, uint len NULL, and it means use the default column family. */ const char *comment= table_arg->key_info[i].comment.str; - cf_handle= cf_manager.get_or_create_cf(rdb, comment); + if (looks_like_per_index_cf_typo(comment)) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "column family name looks like a typo of $per_index_cf"); + goto error; + } + bool is_auto_cf; + cf_handle= cf_manager.get_or_create_cf(rdb, comment, db_table, + table_arg->key_info[i].name, + &is_auto_cf); if (!cf_handle) goto error; if (!(key_descr[i]= new RDBSE_KEYDEF(ddl_manager.get_next_number(), i, - cf_handle))) + cf_handle, is_auto_cf))) goto error; } pk_descr= key_descr[table_arg->s->primary_key]; @@ -2802,6 +2816,32 @@ int ha_rocksdb::rename_table(const char *from, const char *to) if (rocksdb_normalize_tablename(to, &to_str)) return HA_ERR_INTERNAL_ERROR; + /* Check if any index has a per-index column family */ + { + /* + We can't check this directly, because + 1. the object that referred to the old table has been close()d. + 2. this ha_rocksdb object has no connection to any table at all, it has + been just created with (TABLE*)NULL. + + So, we have to find the old table in the ddl_manager (it's there because + it has been opened by this statement), and check is_auto_cf there. + */ + RDBSE_TABLE_DEF *tdef; + if (!(tdef= ddl_manager.find((uchar*)from_str.c_ptr(), from_str.length()))) + return HA_ERR_INTERNAL_ERROR; + + for (uint i= 0; i < tdef->n_keys; i++) + { + if (tdef->key_descr[i]->is_auto_cf) + { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "ALTER TABLE on table with per-index CF"); + return HA_ERR_INTERNAL_ERROR; + } + } + } + if (ddl_manager.rename((uchar*)from_str.ptr(), from_str.length(), (uchar*)to_str.ptr(), to_str.length(), rdb)) return HA_ERR_INTERNAL_ERROR; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index f59ad1612d39..c3c3070e1b4e 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -33,8 +33,27 @@ #include "rdb_applyiter.h" + +/* + This is + - the name of the default Column Family (the CF which stores indexes which didn't + explicity specify which CF they are in) + - the name used to set the default column family parameter for per-cf + arguments. +*/ const char * const DEFAULT_CF_NAME= "default"; +/* + Column family name which means "put this index into its own column family". + See get_per_index_cf_name. +*/ +const char * const PER_INDEX_CF_NAME = "$per_index_cf"; + +inline bool looks_like_per_index_cf_typo(const char *name) +{ + return (name && name[0]=='$' && strcmp(name, PER_INDEX_CF_NAME)); +} + //#ifdef HAVE_PSI_INTERFACE extern PSI_stage_info stage_waiting_on_row_lock; diff --git a/storage/rocksdb/rdb_cf_manager.cc b/storage/rocksdb/rdb_cf_manager.cc index fbcd8d3245c8..f64fc2e8ec4a 100644 --- a/storage/rocksdb/rdb_cf_manager.cc +++ b/storage/rocksdb/rdb_cf_manager.cc @@ -49,35 +49,72 @@ void Column_family_manager::cleanup() } +/* + Generate Column Family name for per-index column families + + @param res OUT Column Family name +*/ + +void get_per_index_cf_name(const char *db_table_name, const char *index_name, + std::string *res) +{ + res->assign(db_table_name); + res->append("."); + res->append(index_name); +} + + +/* + @brief + Find column family by name. If it doesn't exist, create it + + @detail + See Column_family_manager::get_cf +*/ rocksdb::ColumnFamilyHandle* -Column_family_manager::get_or_create_cf(rocksdb::DB *rdb, const char *name) +Column_family_manager::get_or_create_cf(rocksdb::DB *rdb, const char *cf_name, + const char *db_table_name, + const char *index_name, + bool *is_automatic) { rocksdb::ColumnFamilyHandle* cf_handle; ColumnFamilyHandleMap::iterator it; mysql_mutex_lock(&cfm_mutex); - if (name == NULL) + *is_automatic= false; + if (cf_name == NULL) { cf_handle= default_cf; } - else if ((it= cf_map.find(name)) != cf_map.end()) - cf_handle= it->second; else { - /* Create a Column Family. */ - std::string cf_name(name); - rocksdb::ColumnFamilyOptions opts; - get_cf_options(cf_name, &opts); - - sql_print_information("RocksDB: creating column family %s", cf_name.c_str()); - sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); - sql_print_information(" target_file_size_base=%d", opts.target_file_size_base); - - rocksdb::Status s= rdb->CreateColumnFamily(opts, name, &cf_handle); - if (s.ok()) - cf_map[cf_name]= cf_handle; + std::string per_index_name; + if (!strcmp(cf_name, PER_INDEX_CF_NAME)) + { + get_per_index_cf_name(db_table_name, index_name, &per_index_name); + cf_name= per_index_name.c_str(); + *is_automatic= true; + } + + if ((it= cf_map.find(cf_name)) != cf_map.end()) + cf_handle= it->second; else - cf_handle= NULL; + { + /* Create a Column Family. */ + std::string cf_name_str(cf_name); + rocksdb::ColumnFamilyOptions opts; + get_cf_options(cf_name_str, &opts); + + sql_print_information("RocksDB: creating column family %s", cf_name_str.c_str()); + sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); + sql_print_information(" target_file_size_base=%d", opts.target_file_size_base); + + rocksdb::Status s= rdb->CreateColumnFamily(opts, cf_name_str, &cf_handle); + if (s.ok()) + cf_map[cf_name_str]= cf_handle; + else + cf_handle= NULL; + } } mysql_mutex_unlock(&cfm_mutex); @@ -85,19 +122,46 @@ Column_family_manager::get_or_create_cf(rocksdb::DB *rdb, const char *name) } +/* + Find column family by its cf_name. + + @detail + dbname.tablename and index_name are also parameters, because + cf_name=PER_INDEX_CF_NAME means that column family name is a function + of table/index name. + + @param out is_automatic TRUE<=> column family name is auto-assigned based on + db_table_name and index_name. +*/ + rocksdb::ColumnFamilyHandle* -Column_family_manager::get_cf(const char *name) +Column_family_manager::get_cf(const char *cf_name, + const char *db_table_name, + const char *index_name, + bool *is_automatic) { rocksdb::ColumnFamilyHandle* cf_handle; ColumnFamilyHandleMap::iterator it; + *is_automatic= false; mysql_mutex_lock(&cfm_mutex); - if (name == NULL) + if (cf_name == NULL) cf_handle= default_cf; - else if ((it= cf_map.find(name)) != cf_map.end()) - cf_handle= it->second; else - cf_handle= NULL; + { + std::string per_index_name; + if (!strcmp(cf_name, PER_INDEX_CF_NAME)) + { + get_per_index_cf_name(db_table_name, index_name, &per_index_name); + cf_name= per_index_name.c_str(); + *is_automatic= true; + } + + if ((it= cf_map.find(cf_name)) != cf_map.end()) + cf_handle= it->second; + else + cf_handle= NULL; + } mysql_mutex_unlock(&cfm_mutex); return cf_handle; diff --git a/storage/rocksdb/rdb_cf_manager.h b/storage/rocksdb/rdb_cf_manager.h index e00598258d13..e1375e7223b4 100644 --- a/storage/rocksdb/rdb_cf_manager.h +++ b/storage/rocksdb/rdb_cf_manager.h @@ -22,6 +22,9 @@ */ void get_cf_options(const std::string &cf_name, rocksdb::ColumnFamilyOptions *opts); +void get_per_index_cf_name(const char *db_table_name, const char *index_name, + std::string *res); + /* We need a column family manager. Its functions: - create column families (synchronized, don't create the same twice) @@ -54,12 +57,22 @@ class Column_family_manager std::vector *handles); void cleanup(); - /* Used by CREATE TABLE. name=NULL means use default column family */ + /* + Used by CREATE TABLE. + - cf_name=NULL means use default column family + - cf_name=_auto_ means use 'dbname.tablename.indexname' + */ rocksdb::ColumnFamilyHandle* get_or_create_cf(rocksdb::DB *rdb, - const char *name); + const char *cf_name, + const char *db_table_name, + const char *index_name, + bool *is_automatic); /* Used by table open */ - rocksdb::ColumnFamilyHandle* get_cf(const char *name); + rocksdb::ColumnFamilyHandle* get_cf(const char *cf_name, + const char *db_table_name, + const char *index_name, + bool *is_automatic); // void drop_cf(); -- not implemented so far. }; diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc index 1bcfb44b1970..1e8c6f1da89c 100644 --- a/storage/rocksdb/rdb_datadic.cc +++ b/storage/rocksdb/rdb_datadic.cc @@ -949,7 +949,8 @@ bool Table_ddl_manager::init(rocksdb::DB *rdb_dict) initialization requires that there is an open TABLE* where we could look at Field* objects and set max_length and other attributes */ - tdef->key_descr[keyno]= new RDBSE_KEYDEF(index_number, keyno, NULL); + tdef->key_descr[keyno]= new RDBSE_KEYDEF(index_number, keyno, NULL, + false); /* Keep track of what was the last index number we saw */ if (max_number < index_number) diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 00a19c02395f..66ddf366c8d7 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -189,9 +189,10 @@ class RDBSE_KEYDEF } RDBSE_KEYDEF(uint indexnr_arg, uint keyno_arg, - rocksdb::ColumnFamilyHandle* cf_handle_arg) : + rocksdb::ColumnFamilyHandle* cf_handle_arg, bool is_auto_cf_arg) : index_number(indexnr_arg), cf_handle(cf_handle_arg), + is_auto_cf(is_auto_cf_arg), pk_part_no(NULL), pack_info(NULL), keyno(keyno_arg), @@ -208,9 +209,11 @@ class RDBSE_KEYDEF }; void setup(TABLE *table); - void set_cf_handle(rocksdb::ColumnFamilyHandle* cf_handle_arg) + void set_cf_handle(rocksdb::ColumnFamilyHandle* cf_handle_arg, + bool is_auto_cf_arg) { cf_handle= cf_handle_arg; + is_auto_cf= is_auto_cf_arg; } rocksdb::ColumnFamilyHandle *get_cf() { return cf_handle; } @@ -223,6 +226,9 @@ class RDBSE_KEYDEF uchar index_number_storage_form[INDEX_NUMBER_SIZE]; rocksdb::ColumnFamilyHandle* cf_handle; +public: + bool is_auto_cf; +private: friend class RDBSE_TABLE_DEF; // for index_number above