diff --git a/.gitignore b/.gitignore index 1b5cd3946..9cda32e6f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ libcassandra.so* *.a # cmake output +src/third_party/sparsehash/src/sparsehash/internal/sparseconfig.h *.cmake !Find*.cmake *.build diff --git a/CMakeLists.txt b/CMakeLists.txt index e38d8071b..0f8ab52c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,6 @@ option(CASS_USE_BOOST_ATOMIC "Use Boost atomics library" OFF) option(CASS_USE_STD_ATOMIC "Use C++11 atomics library" OFF) option(CASS_USE_OPENSSL "Use OpenSSL" ON) option(CASS_USE_TCMALLOC "Use tcmalloc" OFF) -option(CASS_USE_SPARSEHASH "Use sparsehash" OFF) option(CASS_USE_ZLIB "Use zlib" OFF) option(CASS_USE_LIBSSH2 "Use libssh2 for integration tests" ON) @@ -108,12 +107,6 @@ if(CASS_USE_BOOST_ATOMIC OR CASS_BUILD_INTEGRATION_TESTS OR CASS_BUILD_UNIT_TEST CassUseBoost() endif() -# Sparsehash -if(CASS_USE_SPARSEHASH) - CassUseSparshHash() -endif() - - # OpenSSL if(CASS_USE_OPENSSL) CassUseOpenSSL() diff --git a/cmake/modules/CppDriver.cmake b/cmake/modules/CppDriver.cmake index 8538aef7e..ef56260a1 100644 --- a/cmake/modules/CppDriver.cmake +++ b/cmake/modules/CppDriver.cmake @@ -158,40 +158,6 @@ macro(CassUseBoost) endif() endmacro() -#------------------------ -# CassUseSparseHash -# -# Add includes required for using SparseHash. -# -# Input: CASS_INCLUDES -# Output: CASS_INCLUDES -#------------------------ -macro(CassUseSparseHash) - # Setup the paths and hints for sparsehash - set(_SPARSEHASH_ROOT_PATHS "${PROJECT_SOURCE_DIR}/lib/sparsehash/") - set(_SPARSEHASH_ROOT_HINTS ${SPARSEHASH_ROOT_DIR} $ENV{SPARSEHASH_ROOT_DIR}) - if(NOT WIN32) - set(_SPARSEHASH_ROOT_PATHS ${_SPARSEHASH_ROOT_PATHS} "/usr/" "/usr/local/") - endif() - set(_SPARSEHASH_ROOT_HINTS_AND_PATHS - HINTS ${_SPARSEHASH_ROOT_HINTS} - PATHS ${_SPARSEHASH_ROOT_PATHS}) - - # Ensure sparsehash headers were found - find_path(SPARSEHASH_INCLUDE_DIR - NAMES google/dense_hash_map - HINTS ${_SPARSEHASH_INCLUDE_DIR} ${_SPARSEHASH_ROOT_HINTS_AND_PATHS} - PATH_SUFFIXES include) - find_package_handle_standard_args(SparseJash "Could NOT find sparsehash, try to set the path to the sparsehash root folder in the system variable SPARSEHASH_ROOT_DIR" - SPARSEHASH_INCLUDE_DIR) - - set(CASS_INCLUDES ${CASS_INCLUDES} ${SPARSEHASH_INCLUDE_DIR}) - - if (SPARSEHASH_INCLUDE_DIR) - add_definitions("-DCASS_USE_SPARSEHASH") - endif() -endmacro() - #------------------------ # CassUseOpenSSL # @@ -458,8 +424,11 @@ macro(CassAddIncludes) ${CASS_SOURCE_DIR}/src ${CASS_SOURCE_DIR}/src/ssl ${CASS_SOURCE_DIR}/src/third_party/rapidjson + ${CASS_SOURCE_DIR}/src/third_party/rapidjson + ${CASS_SOURCE_DIR}/src/third_party/sparsehash/src ${CASS_INCLUDES} ) + add_subdirectory(src/third_party/sparsehash) endmacro() #------------------------ diff --git a/include/cassandra.h b/include/cassandra.h index 9f7007ef3..0fd4654da 100644 --- a/include/cassandra.h +++ b/include/cassandra.h @@ -1329,9 +1329,9 @@ cass_cluster_set_load_balance_dc_aware_n(CassCluster* cluster, /** * Configures the cluster to use token-aware request routing or not. * - * Important: Token-aware routing depends on keyspace information. - * For this reason enabling token-aware routing will also enable the usage - * of schema metadata. + * Important: Token-aware routing depends on keyspace metadata. + * For this reason enabling token-aware routing will also enable retrieving + * and updating keyspace schema metadata. * * Default: cass_true (enabled). * @@ -1343,8 +1343,6 @@ cass_cluster_set_load_balance_dc_aware_n(CassCluster* cluster, * * @param[in] cluster * @param[in] enabled - * - * @see cass_cluster_set_use_schema(); */ CASS_EXPORT void cass_cluster_set_token_aware_routing(CassCluster* cluster, @@ -1665,9 +1663,8 @@ cass_cluster_set_retry_policy(CassCluster* cluster, /** * Enable/Disable retrieving and updating schema metadata. If disabled * this is allows the driver to skip over retrieving and updating schema - * metadata, but it also disables the usage of token-aware routing and - * cass_session_get_schema_meta() will always return an empty object. This can - * be useful for reducing the startup overhead of short-lived sessions. + * metadata and cass_session_get_schema_meta() will always return an empty object. + * This can be useful for reducing the startup overhead of short-lived sessions. * * Default: cass_true (enabled). * @@ -1677,7 +1674,6 @@ cass_cluster_set_retry_policy(CassCluster* cluster, * @param[in] enabled * * @see cass_session_get_schema_meta() - * @see cass_cluster_set_token_aware_routing() */ CASS_EXPORT void cass_cluster_set_use_schema(CassCluster* cluster, diff --git a/src/address.cpp b/src/address.cpp index 4675ca78e..7553d8292 100644 --- a/src/address.cpp +++ b/src/address.cpp @@ -21,6 +21,9 @@ namespace cass { +const Address Address::EMPTY_KEY("0.0.0.0", 0); +const Address Address::DELETED_KEY("0.0.0.0", 1); + Address::Address() { init(); } @@ -153,6 +156,9 @@ int Address::compare(const Address& a) const { if (family() != a.family()) { return family() < a.family() ? -1 : 1; } + if (port() != a.port()) { + return port() < a.port() ? -1 : 1; + } if (family() == AF_INET) { if (addr_in()->sin_addr.s_addr != a.addr_in()->sin_addr.s_addr) { return addr_in()->sin_addr.s_addr < a.addr_in()->sin_addr.s_addr ? -1 : 1; diff --git a/src/address.hpp b/src/address.hpp index b50f4c2a3..5c4dd107d 100644 --- a/src/address.hpp +++ b/src/address.hpp @@ -17,8 +17,11 @@ #ifndef __CASS_ADDRESS_HPP_INCLUDED__ #define __CASS_ADDRESS_HPP_INCLUDED__ +#include "hash.hpp" #include "utils.hpp" +#include + #include #include #include @@ -29,6 +32,9 @@ namespace cass { class Address { public: + static const Address EMPTY_KEY; + static const Address DELETED_KEY; + Address(); Address(const std::string& ip, int port); @@ -77,8 +83,21 @@ class Address { struct sockaddr_storage addr_; }; +struct AddressHash { + std::size_t operator()(const cass::Address& a) const { + if (a.family() == AF_INET) { + return cass::hash::fnv1a(reinterpret_cast(a.addr_in()), + sizeof(struct sockaddr_in)); + } else if (a.family() == AF_INET6) { + return cass::hash::fnv1a(reinterpret_cast(a.addr_in6()), + sizeof(struct sockaddr_in6)); + } + return 0; + } +}; + typedef std::vector
AddressVec; -typedef std::set
AddressSet; +typedef sparsehash::dense_hash_set AddressSet; inline bool operator<(const Address& a, const Address& b) { return a.compare(b) < 0; diff --git a/src/cluster.cpp b/src/cluster.cpp index 677a6806d..1cd1ca5d1 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -259,10 +259,6 @@ CassError cass_cluster_set_load_balance_dc_aware_n(CassCluster* cluster, void cass_cluster_set_token_aware_routing(CassCluster* cluster, cass_bool_t enabled) { cluster->config().set_token_aware_routing(enabled == cass_true); - // Token-aware routing relies on up-to-date schema information - if (enabled == cass_true) { - cluster->config().set_use_schema(true); - } } void cass_cluster_set_latency_aware_routing(CassCluster* cluster, @@ -407,10 +403,6 @@ void cass_cluster_set_timestamp_gen(CassCluster* cluster, void cass_cluster_set_use_schema(CassCluster* cluster, cass_bool_t enabled) { cluster->config().set_use_schema(enabled == cass_true); - // Token-aware routing relies on up-to-date schema information - if (enabled == cass_false) { - cluster->config().set_token_aware_routing(false); - } } CassError cass_cluster_set_use_hostname_resolution(CassCluster* cluster, diff --git a/src/connection.hpp b/src/connection.hpp index dd46cae50..8c98d07bc 100644 --- a/src/connection.hpp +++ b/src/connection.hpp @@ -20,6 +20,7 @@ #include "buffer.hpp" #include "cassandra.h" #include "handler.hpp" +#include "hash.hpp" #include "host.hpp" #include "list.hpp" #include "macros.hpp" diff --git a/src/constants.hpp b/src/constants.hpp index 60f41c1ec..eff58c96c 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -17,6 +17,8 @@ #ifndef __CASS_CONSTANTS_HPP_INCLUDED__ #define __CASS_CONSTANTS_HPP_INCLUDED__ +#define CASS_UINT32_MAX 4294967295UL + #define CASS_INT64_MAX 9223372036854775807LL #define CASS_INT64_MIN (-CASS_INT64_MAX - 1) diff --git a/src/control_connection.cpp b/src/control_connection.cpp index cb61e2895..1665cce06 100644 --- a/src/control_connection.cpp +++ b/src/control_connection.cpp @@ -112,7 +112,8 @@ ControlConnection::ControlConnection() , session_(NULL) , connection_(NULL) , protocol_version_(0) - , should_query_tokens_(false) {} + , use_schema_(false) + , token_aware_routing_(false) { } const SharedRefPtr& ControlConnection::connected_host() const { return current_host_; @@ -126,19 +127,21 @@ void ControlConnection::clear() { query_plan_.reset(); protocol_version_ = 0; last_connection_error_.clear(); - should_query_tokens_ = false; + use_schema_ = false; + token_aware_routing_ = false; } void ControlConnection::connect(Session* session) { session_ = session; query_plan_.reset(new ControlStartupQueryPlan(session_->hosts_)); // No hosts lock necessary (read-only) protocol_version_ = session_->config().protocol_version(); - should_query_tokens_ = session_->config().token_aware_routing(); + use_schema_ = session_->config().use_schema(); + token_aware_routing_ = session_->config().token_aware_routing(); if (protocol_version_ < 0) { protocol_version_ = CASS_HIGHEST_SUPPORTED_PROTOCOL_VERSION; } - if (session_->config().use_schema()) { + if (use_schema_ || token_aware_routing_) { set_event_types(CASS_EVENT_TOPOLOGY_CHANGE | CASS_EVENT_STATUS_CHANGE | CASS_EVENT_SCHEMA_CHANGE); } else { @@ -199,9 +202,6 @@ void ControlConnection::on_ready(Connection* connection) { LOG_DEBUG("Connection ready on host %s", connection->address().to_string().c_str()); - // A protocol version is need to encode/decode maps properly - session_->metadata().set_protocol_version(protocol_version_); - // The control connection has to refresh meta when there's a reconnect because // events could have been missed while not connected. query_meta_hosts(); @@ -284,7 +284,9 @@ void ControlConnection::on_event(EventResponse* response) { SharedRefPtr host = session_->get_host(response->affected_node()); if (host) { session_->on_remove(host); - session_->metadata().remove_host(host); + if (session_->token_map_) { + session_->token_map_->remove_host_and_build(host); + } } else { LOG_DEBUG("Tried to remove host %s that doesn't exist", address_str.c_str()); } @@ -298,7 +300,9 @@ void ControlConnection::on_event(EventResponse* response) { refresh_node_info(host, false, true); } else { LOG_DEBUG("Move event for host %s that doesn't exist", address_str.c_str()); - session_->metadata().remove_host(host); + if (session_->token_map_) { + session_->token_map_->remove_host_and_build(host); + } } break; } @@ -324,10 +328,17 @@ void ControlConnection::on_event(EventResponse* response) { } case CASS_EVENT_SCHEMA_CHANGE: + // Only handle keyspace events when using token-aware routing + if (!use_schema_ && + response->schema_change_target() != EventResponse::KEYSPACE) { + return; + } + LOG_DEBUG("Schema change (%d): %.*s %.*s\n", response->schema_change(), (int)response->keyspace().size(), response->keyspace().data(), (int)response->target().size(), response->target().data()); + switch (response->schema_change()) { case EventResponse::CREATED: case EventResponse::UPDATED: @@ -389,8 +400,11 @@ void ControlConnection::on_event(EventResponse* response) { void ControlConnection::query_meta_hosts() { ScopedRefPtr > handler( new ControlMultipleRequestHandler(this, ControlConnection::on_query_hosts, UnusedData())); - handler->execute_query("local", SELECT_LOCAL_TOKENS); - handler->execute_query("peers", SELECT_PEERS_TOKENS); + // This needs to happen before other schema metadata queries so that we have + // a valid Cassandra version because this version determines which follow up + // schema metadata queries are executed. + handler->execute_query("local", token_aware_routing_ ? SELECT_LOCAL_TOKENS : SELECT_LOCAL); + handler->execute_query("peers", token_aware_routing_ ? SELECT_PEERS_TOKENS : SELECT_PEERS); } void ControlConnection::on_query_hosts(ControlConnection* control_connection, @@ -403,6 +417,11 @@ void ControlConnection::on_query_hosts(ControlConnection* control_connection, Session* session = control_connection->session_; + if (session->token_map_) { + // Clearing token/hosts will not invalidate the replicas + session->token_map_->clear_tokens_and_hosts(); + } + bool is_initial_connection = (control_connection->state_ == CONTROL_STATE_NEW); // If the 'system.local' table is empty the connection isn't used as a control @@ -418,9 +437,8 @@ void ControlConnection::on_query_hosts(ControlConnection* control_connection, ResultResponse* local_result; if (MultipleRequestHandler::get_result_response(responses, "local", &local_result) && local_result->row_count() > 0) { - local_result->decode_first_row(); - control_connection->update_node_info(host, &local_result->first_row()); - session->metadata().set_cassandra_version(host->cassandra_version()); + control_connection->update_node_info(host, &local_result->first_row(), ADD_HOST); + control_connection->cassandra_version_ = host->cassandra_version(); } else { LOG_WARN("No row found in %s's local system table", connection->address_string().c_str()); @@ -438,7 +456,6 @@ void ControlConnection::on_query_hosts(ControlConnection* control_connection, { ResultResponse* peers_result; if (MultipleRequestHandler::get_result_response(responses, "peers", &peers_result)) { - peers_result->decode_first_row(); ResultIterator rows(peers_result); while (rows.next()) { Address address; @@ -459,7 +476,7 @@ void ControlConnection::on_query_hosts(ControlConnection* control_connection, host->set_mark(session->current_host_mark_); - control_connection->update_node_info(host, rows.row()); + control_connection->update_node_info(host, rows.row(), ADD_HOST); if (is_new && !is_initial_connection) { session->on_add(host, false); } @@ -469,7 +486,8 @@ void ControlConnection::on_query_hosts(ControlConnection* control_connection, session->purge_hosts(is_initial_connection); - if (session->config().use_schema()) { + if (control_connection->use_schema_ || + control_connection->token_aware_routing_) { control_connection->query_meta_schema(); } else if (is_initial_connection) { control_connection->state_ = CONTROL_STATE_READY; @@ -486,25 +504,33 @@ void ControlConnection::query_meta_schema() { ScopedRefPtr > handler( new ControlMultipleRequestHandler(this, ControlConnection::on_query_meta_schema, UnusedData())); - if (session_->metadata().cassandra_version() >= VersionNumber(3, 0, 0)) { - handler->execute_query("keyspaces", SELECT_KEYSPACES_30); - handler->execute_query("tables", SELECT_TABLES_30); - handler->execute_query("views", SELECT_VIEWS_30); - handler->execute_query("columns", SELECT_COLUMNS_30); - handler->execute_query("indexes", SELECT_INDEXES_30); - handler->execute_query("user_types", SELECT_USERTYPES_30); - handler->execute_query("functions", SELECT_FUNCTIONS_30); - handler->execute_query("aggregates", SELECT_AGGREGATES_30); + if (cassandra_version_ >= VersionNumber(3, 0, 0)) { + if (use_schema_ || token_aware_routing_) { + handler->execute_query("keyspaces", SELECT_KEYSPACES_30); + } + if (use_schema_) { + handler->execute_query("tables", SELECT_TABLES_30); + handler->execute_query("views", SELECT_VIEWS_30); + handler->execute_query("columns", SELECT_COLUMNS_30); + handler->execute_query("indexes", SELECT_INDEXES_30); + handler->execute_query("user_types", SELECT_USERTYPES_30); + handler->execute_query("functions", SELECT_FUNCTIONS_30); + handler->execute_query("aggregates", SELECT_AGGREGATES_30); + } } else { - handler->execute_query("keyspaces", SELECT_KEYSPACES_20); - handler->execute_query("tables", SELECT_COLUMN_FAMILIES_20); - handler->execute_query("columns", SELECT_COLUMNS_20); - if (session_->metadata().cassandra_version() >= VersionNumber(2, 1, 0)) { - handler->execute_query("user_types", SELECT_USERTYPES_21); + if (use_schema_ || token_aware_routing_) { + handler->execute_query("keyspaces", SELECT_KEYSPACES_20); } - if (session_->metadata().cassandra_version() >= VersionNumber(2, 2, 0)) { - handler->execute_query("functions", SELECT_FUNCTIONS_22); - handler->execute_query("aggregates", SELECT_AGGREGATES_22); + if (use_schema_) { + handler->execute_query("tables", SELECT_COLUMN_FAMILIES_20); + handler->execute_query("columns", SELECT_COLUMNS_20); + if (cassandra_version_ >= VersionNumber(2, 1, 0)) { + handler->execute_query("user_types", SELECT_USERTYPES_21); + } + if (cassandra_version_ >= VersionNumber(2, 2, 0)) { + handler->execute_query("functions", SELECT_FUNCTIONS_22); + handler->execute_query("aggregates", SELECT_AGGREGATES_22); + } } } } @@ -518,53 +544,65 @@ void ControlConnection::on_query_meta_schema(ControlConnection* control_connecti } Session* session = control_connection->session_; - - session->metadata().clear_and_update_back(); + int protocol_version = control_connection->protocol_version_; + const VersionNumber& cassandra_version = control_connection->cassandra_version_; bool is_initial_connection = (control_connection->state_ == CONTROL_STATE_NEW); - ResultResponse* keyspaces_result; - if (MultipleRequestHandler::get_result_response(responses, "keyspaces", &keyspaces_result)) { - session->metadata().update_keyspaces(keyspaces_result); + if (session->token_map_) { + ResultResponse* keyspaces_result; + if (MultipleRequestHandler::get_result_response(responses, "keyspaces", &keyspaces_result)) { + session->token_map_->clear_replicas_and_strategies(); // Only clear replicas once we have the new keyspaces + session->token_map_->add_keyspaces(cassandra_version, keyspaces_result); + } + session->token_map_->build(); } - ResultResponse* tables_result; - if (MultipleRequestHandler::get_result_response(responses, "tables", &tables_result)) { - session->metadata().update_tables(tables_result); - } + if (control_connection->use_schema_) { + session->metadata().clear_and_update_back(cassandra_version); - ResultResponse* views_result; - if (MultipleRequestHandler::get_result_response(responses, "views", &views_result)) { - session->metadata().update_views(views_result); - } + ResultResponse* keyspaces_result; + if (MultipleRequestHandler::get_result_response(responses, "keyspaces", &keyspaces_result)) { + session->metadata().update_keyspaces(protocol_version, cassandra_version, keyspaces_result); + } - ResultResponse* columns_result = NULL; - if (MultipleRequestHandler::get_result_response(responses, "columns", &columns_result)) { - session->metadata().update_columns(columns_result); - } + ResultResponse* tables_result; + if (MultipleRequestHandler::get_result_response(responses, "tables", &tables_result)) { + session->metadata().update_tables(protocol_version, cassandra_version, tables_result); + } - ResultResponse* indexes_result; - if (MultipleRequestHandler::get_result_response(responses, "indexes", &indexes_result)) { - session->metadata().update_indexes(indexes_result); - } + ResultResponse* views_result; + if (MultipleRequestHandler::get_result_response(responses, "views", &views_result)) { + session->metadata().update_views(protocol_version, cassandra_version, views_result); + } - ResultResponse* user_types_result; - if (MultipleRequestHandler::get_result_response(responses, "user_types", &user_types_result)) { - session->metadata().update_user_types(user_types_result); - } + ResultResponse* columns_result = NULL; + if (MultipleRequestHandler::get_result_response(responses, "columns", &columns_result)) { + session->metadata().update_columns(protocol_version, cassandra_version, columns_result); + } - ResultResponse* functions_result; - if (MultipleRequestHandler::get_result_response(responses, "functions", &functions_result)) { - session->metadata().update_functions(functions_result); - } + ResultResponse* indexes_result; + if (MultipleRequestHandler::get_result_response(responses, "indexes", &indexes_result)) { + session->metadata().update_indexes(protocol_version, cassandra_version, indexes_result); + } - ResultResponse* aggregates_result; - if (MultipleRequestHandler::get_result_response(responses, "aggregates", &aggregates_result)) { - session->metadata().update_aggregates(aggregates_result); - } + ResultResponse* user_types_result; + if (MultipleRequestHandler::get_result_response(responses, "user_types", &user_types_result)) { + session->metadata().update_user_types(protocol_version, cassandra_version, user_types_result); + } - session->metadata().swap_to_back_and_update_front(); - if (control_connection->should_query_tokens_) session->metadata().build(); + ResultResponse* functions_result; + if (MultipleRequestHandler::get_result_response(responses, "functions", &functions_result)) { + session->metadata().update_functions(protocol_version, cassandra_version, functions_result); + } + + ResultResponse* aggregates_result; + if (MultipleRequestHandler::get_result_response(responses, "aggregates", &aggregates_result)) { + session->metadata().update_aggregates(protocol_version, cassandra_version, aggregates_result); + } + + session->metadata().swap_to_back_and_update_front(); + } if (is_initial_connection) { control_connection->state_ = CONTROL_STATE_READY; @@ -587,7 +625,7 @@ void ControlConnection::refresh_node_info(SharedRefPtr host, std::string query; ControlHandler::ResponseCallback response_callback; - bool token_query = should_query_tokens_ && (host->was_just_added() || query_tokens); + bool token_query = token_aware_routing_ && (host->was_just_added() || query_tokens); if (is_connected_host || !host->listen_address().empty()) { if (is_connected_host) { query.assign(token_query ? SELECT_LOCAL_TOKENS : SELECT_LOCAL); @@ -636,8 +674,7 @@ void ControlConnection::on_refresh_node_info(ControlConnection* control_connecti host_address_str.c_str()); return; } - result->decode_first_row(); - control_connection->update_node_info(data.host, &result->first_row()); + control_connection->update_node_info(data.host, &result->first_row(), UPDATE_HOST_AND_BUILD); if (data.is_new_node) { control_connection->session_->on_add(data.host, false); @@ -665,7 +702,6 @@ void ControlConnection::on_refresh_node_info_all(ControlConnection* control_conn return; } - result->decode_first_row(); ResultIterator rows(result); while (rows.next()) { const Row* row = rows.row(); @@ -676,7 +712,7 @@ void ControlConnection::on_refresh_node_info_all(ControlConnection* control_conn row->get_by_name("rpc_address"), &address); if (is_valid_address && data.host->address().compare(address) == 0) { - control_connection->update_node_info(data.host, row); + control_connection->update_node_info(data.host, row, UPDATE_HOST_AND_BUILD); if (data.is_new_node) { control_connection->session_->on_add(data.host, false); } @@ -685,7 +721,7 @@ void ControlConnection::on_refresh_node_info_all(ControlConnection* control_conn } } -void ControlConnection::update_node_info(SharedRefPtr host, const Row* row) { +void ControlConnection::update_node_info(SharedRefPtr host, const Row* row, UpdateHostType type) { const Value* v; std::string rack; @@ -727,21 +763,22 @@ void ControlConnection::update_node_info(SharedRefPtr host, const Row* row host->address().to_string().c_str()); } - if (should_query_tokens_) { + if (token_aware_routing_) { bool is_connected_host = connection_ != NULL && host->address().compare(connection_->address()) == 0; std::string partitioner; if (is_connected_host && row->get_string_by_name("partitioner", &partitioner)) { - session_->metadata().set_partitioner(partitioner); + if (!session_->token_map_) { + session_->token_map_.reset(TokenMap::from_partitioner(partitioner)); + } } v = row->get_by_name("tokens"); - if (v != NULL) { - CollectionIterator i(v); - TokenStringList tokens; - while (i.next()) { - tokens.push_back(i.value()->to_string_ref()); - } - if (!tokens.empty()) { - session_->metadata().update_host(host, tokens); + if (v != NULL && v->is_collection()) { + if (session_->token_map_) { + if (type == UPDATE_HOST_AND_BUILD) { + session_->token_map_->update_host_and_build(host, v); + } else { + session_->token_map_->add_host(host, v); + } } } } @@ -750,7 +787,7 @@ void ControlConnection::update_node_info(SharedRefPtr host, const Row* row void ControlConnection::refresh_keyspace(const StringRef& keyspace_name) { std::string query; - if (session_->metadata().cassandra_version() >= VersionNumber(3, 0, 0)) { + if (cassandra_version_ >= VersionNumber(3, 0, 0)) { query.assign(SELECT_KEYSPACES_30); } else { query.assign(SELECT_KEYSPACES_20); @@ -777,7 +814,18 @@ void ControlConnection::on_refresh_keyspace(ControlConnection* control_connectio keyspace_name.c_str()); return; } - control_connection->session_->metadata().update_keyspaces(result); + + Session* session = control_connection->session_; + int protocol_version = control_connection->protocol_version_; + const VersionNumber& cassandra_version = control_connection->cassandra_version_; + + if (session->token_map_) { + session->token_map_->update_keyspaces_and_build(cassandra_version, result); + } + + if (control_connection->use_schema_) { + session->metadata().update_keyspaces(protocol_version, cassandra_version, result); + } } void ControlConnection::refresh_table_or_view(const StringRef& keyspace_name, @@ -787,7 +835,7 @@ void ControlConnection::refresh_table_or_view(const StringRef& keyspace_name, std::string column_query; std::string index_query; - if (session_->metadata().cassandra_version() >= VersionNumber(3, 0, 0)) { + if (cassandra_version_ >= VersionNumber(3, 0, 0)) { table_query.assign(SELECT_TABLES_30); table_query.append(" WHERE keyspace_name='").append(keyspace_name.data(), keyspace_name.size()) .append("' AND table_name='").append(table_or_view_name.data(), table_or_view_name.size()).append("'"); @@ -837,6 +885,8 @@ void ControlConnection::on_refresh_table_or_view(ControlConnection* control_conn const MultipleRequestHandler::ResponseMap& responses) { ResultResponse* tables_result; Session* session = control_connection->session_; + int protocol_version = control_connection->protocol_version_; + const VersionNumber& cassandra_version = control_connection->cassandra_version_; if (!MultipleRequestHandler::get_result_response(responses, "tables", &tables_result) || tables_result->row_count() == 0) { ResultResponse* views_result; @@ -846,19 +896,19 @@ void ControlConnection::on_refresh_table_or_view(ControlConnection* control_conn data.keyspace_name.c_str(), data.table_or_view_name.c_str()); return; } - session->metadata().update_views(views_result); + session->metadata().update_views(protocol_version, cassandra_version, views_result); } else { - session->metadata().update_tables(tables_result); + session->metadata().update_tables(protocol_version, cassandra_version, tables_result); } ResultResponse* columns_result; if (MultipleRequestHandler::get_result_response(responses, "columns", &columns_result)) { - session->metadata().update_columns(columns_result); + session->metadata().update_columns(protocol_version, cassandra_version, columns_result); } ResultResponse* indexes_result; if (MultipleRequestHandler::get_result_response(responses, "indexes", &indexes_result)) { - session->metadata().update_indexes(indexes_result); + session->metadata().update_indexes(protocol_version, cassandra_version, indexes_result); } } @@ -867,7 +917,7 @@ void ControlConnection::refresh_type(const StringRef& keyspace_name, const StringRef& type_name) { std::string query; - if (session_->metadata().cassandra_version() >= VersionNumber(3, 0, 0)) { + if (cassandra_version_ >= VersionNumber(3, 0, 0)) { query.assign(SELECT_USERTYPES_30); } else { query.assign(SELECT_USERTYPES_21); @@ -895,7 +945,10 @@ void ControlConnection::on_refresh_type(ControlConnection* control_connection, keyspace_and_type_names.second.c_str()); return; } - control_connection->session_->metadata().update_user_types(result); + Session* session = control_connection->session_; + int protocol_version = control_connection->protocol_version_; + const VersionNumber& cassandra_version = control_connection->cassandra_version_; + session->metadata().update_user_types(protocol_version, cassandra_version, result); } void ControlConnection::refresh_function(const StringRef& keyspace_name, @@ -904,7 +957,7 @@ void ControlConnection::refresh_function(const StringRef& keyspace_name, bool is_aggregate) { std::string query; - if (session_->metadata().cassandra_version() >= VersionNumber(3, 0, 0)) { + if (cassandra_version_ >= VersionNumber(3, 0, 0)) { if (is_aggregate) { query.assign(SELECT_AGGREGATES_30); query.append(" WHERE keyspace_name=? AND aggregate_name=? AND argument_types=?"); @@ -959,10 +1012,13 @@ void ControlConnection::on_refresh_function(ControlConnection* control_connectio Metadata::full_function_name(data.function, data.arg_types).c_str()); return; } + Session* session = control_connection->session_; + int protocol_version = control_connection->protocol_version_; + const VersionNumber& cassandra_version = control_connection->cassandra_version_; if (data.is_aggregate) { - control_connection->session_->metadata().update_aggregates(result); + session->metadata().update_aggregates(protocol_version, cassandra_version, result); } else { - control_connection->session_->metadata().update_functions(result); + session->metadata().update_functions(protocol_version, cassandra_version, result); } } @@ -1026,6 +1082,17 @@ void ControlConnection::on_reconnect(Timer* timer) { control_connection->reconnect(false); } +template +void ControlConnection::ControlMultipleRequestHandler::execute_query( + const std::string& index, const std::string& query) { + // We need to update the loop time to prevent new requests from timing out + // in cases where a callback took a long time to execute. In the future, + // we might improve this by executing the these long running callbacks + // on a seperate thread. + uv_update_time(control_connection_->session_->loop()); + MultipleRequestHandler::execute_query(index, query); +} + template void ControlConnection::ControlMultipleRequestHandler::on_set( const MultipleRequestHandler::ResponseMap& responses) { diff --git a/src/control_connection.hpp b/src/control_connection.hpp index 579b96161..011e345e9 100644 --- a/src/control_connection.hpp +++ b/src/control_connection.hpp @@ -19,7 +19,6 @@ #include "address.hpp" #include "connection.hpp" -#include "token_map.hpp" #include "handler.hpp" #include "host.hpp" #include "load_balancing.hpp" @@ -27,6 +26,9 @@ #include "multiple_request_handler.hpp" #include "response.hpp" #include "scoped_ptr.hpp" +#include "token_map.hpp" + +#include namespace cass { @@ -57,6 +59,10 @@ class ControlConnection : public Connection::Listener { return protocol_version_; } + const VersionNumber& cassandra_version() const { + return cassandra_version_; + } + const SharedRefPtr& connected_host() const; void clear(); @@ -81,6 +87,8 @@ class ControlConnection : public Connection::Listener { , response_callback_(response_callback) , data_(data) {} + void execute_query(const std::string& index, const std::string& query); + virtual void on_set(const MultipleRequestHandler::ResponseMap& responses); virtual void on_error(CassError code, const std::string& message) { @@ -171,6 +179,11 @@ class ControlConnection : public Connection::Listener { bool is_aggregate; }; + enum UpdateHostType { + ADD_HOST, + UPDATE_HOST_AND_BUILD + }; + void schedule_reconnect(uint64_t ms = 0); void reconnect(bool retry_current_host); @@ -206,7 +219,7 @@ class ControlConnection : public Connection::Listener { const RefreshNodeData& data, Response* response); - void update_node_info(SharedRefPtr host, const Row* row); + void update_node_info(SharedRefPtr host, const Row* row, UpdateHostType type); void refresh_keyspace(const StringRef& keyspace_name); static void on_refresh_keyspace(ControlConnection* control_connection, const std::string& keyspace_name, Response* response); @@ -239,8 +252,10 @@ class ControlConnection : public Connection::Listener { ScopedPtr query_plan_; Host::Ptr current_host_; int protocol_version_; + VersionNumber cassandra_version_; std::string last_connection_error_; - bool should_query_tokens_; + bool use_schema_; + bool token_aware_routing_; static Address bind_any_ipv4_; static Address bind_any_ipv6_; diff --git a/src/copy_on_write_ptr.hpp b/src/copy_on_write_ptr.hpp index 062180688..533d67c03 100644 --- a/src/copy_on_write_ptr.hpp +++ b/src/copy_on_write_ptr.hpp @@ -45,6 +45,10 @@ class CopyOnWritePtr { return *this; } + operator bool() const { + return ptr_->ref != NULL; + } + const T& operator*() const { return *(ptr_->ref); } diff --git a/src/dc_aware_policy.cpp b/src/dc_aware_policy.cpp index ea0d387d3..6dbe6e3f7 100644 --- a/src/dc_aware_policy.cpp +++ b/src/dc_aware_policy.cpp @@ -57,7 +57,7 @@ CassHostDistance DCAwarePolicy::distance(const SharedRefPtr& host) const { QueryPlan* DCAwarePolicy::new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache) { CassConsistency cl = request != NULL ? request->consistency() : Request::DEFAULT_CONSISTENCY; return new DCAwareQueryPlan(this, cl, index_++); diff --git a/src/dc_aware_policy.hpp b/src/dc_aware_policy.hpp index d510c30fd..9eb6cc74f 100644 --- a/src/dc_aware_policy.hpp +++ b/src/dc_aware_policy.hpp @@ -52,7 +52,7 @@ class DCAwarePolicy : public LoadBalancingPolicy { virtual QueryPlan* new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache); virtual void on_add(const SharedRefPtr& host); diff --git a/src/future.cpp b/src/future.cpp index aa044303b..1db89ac26 100644 --- a/src/future.cpp +++ b/src/future.cpp @@ -59,7 +59,6 @@ const CassResult* cass_future_get_result(CassFuture* future) { cass::SharedRefPtr result(response_future->response()); if (!result) return NULL; - result->decode_first_row(); result->inc_ref(); return CassResult::to(result.get()); } diff --git a/src/hash.hpp b/src/hash.hpp new file mode 100644 index 000000000..63c1a3841 --- /dev/null +++ b/src/hash.hpp @@ -0,0 +1,58 @@ +/* + Copyright (c) 2014-2016 DataStax + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __CASS_HASH_HPP_INCLUDED__ +#define __CASS_HASH_HPP_INCLUDED__ + +#include +#include + +namespace cass { namespace hash { + +typedef int (Op)(int); + +inline int nop(int c) { return c; } + +#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) +#define FNV1_64_INIT 0xcbf29ce484222325ULL +#define FNV1_64_PRIME 0x100000001b3ULL + +inline uint64_t fnv1a(const char* data, size_t length, Op op = nop) { + uint64_t h = FNV1_64_INIT; + for (size_t i = 0; i < length; ++i) { + h ^= static_cast(op(data[i])); + h *= FNV1_64_PRIME; + } + return h; +} +#else +#define FNV1_32_INIT 0x811c9dc5 +#define FNV1_32_PRIME 0x01000193 + +inline uint32_t fnv1a(const char* data, size_t length, Op op = nop) { + uint32_t h = FNV1_32_INIT; + for (size_t i = 0; i < length; ++i) { + h ^= static_cast(op(data[i])); + h *= FNV1_32_PRIME; + } + return h; +} +#endif + +} } // namespace cass::hash + + +#endif diff --git a/src/hash_table.hpp b/src/hash_table.hpp index e23550fad..f3f8a8f24 100644 --- a/src/hash_table.hpp +++ b/src/hash_table.hpp @@ -18,6 +18,7 @@ #define __CASS_HASH_INDEX_HPP_INCLUDED__ #include "fixed_vector.hpp" +#include "hash.hpp" #include "macros.hpp" #include "string_ref.hpp" #include "utils.hpp" @@ -30,36 +31,6 @@ namespace cass { -#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) -#define FNV1_64_INIT 0xcbf29ce484222325ULL -#define FNV1_64_PRIME 0x100000001b3ULL - -inline uint64_t fnv1a_hash_lower(StringRef s) { - uint64_t h = FNV1_64_INIT; - for(StringRef::const_iterator i = s.begin(), end = s.end(); i != end; ++i) { - h ^= static_cast(static_cast(::tolower(*i))); - h *= FNV1_64_PRIME; - } - return h; -} - -#undef FNV1_64_INIT -#undef FNV1_64_PRIME -#else -#define FNV1_32_INIT 0x811c9dc5 -#define FNV1_32_PRIME 0x01000193 -inline uint32_t fnv1a_hash_lower(StringRef s) { - uint32_t h = FNV1_32_INIT; - for(StringRef::const_iterator i = s.begin(), end = s.end(); i != end; ++i) { - h ^= static_cast(static_cast(::tolower(*i))); - h *= FNV1_32_PRIME; - } - return h; -} -#undef FNV1_32_INIT -#undef FNV1_32_PRIME -#endif - typedef FixedVector IndexVec; template @@ -73,7 +44,6 @@ struct HashTableEntry { T* next; }; - template class CaseInsensitiveHashTable { public: @@ -129,7 +99,8 @@ size_t CaseInsensitiveHashTable::get_indices(StringRef name, IndexVec* result name = name.substr(1, name.size() - 2); } - size_t h = fnv1a_hash_lower(name) & index_mask_; + size_t h = hash::fnv1a(name.data(), + name.size(), ::tolower) & index_mask_; size_t start = h; while (index_[h] != NULL && !iequals(name, index_[h]->name)) { @@ -188,7 +159,8 @@ void CaseInsensitiveHashTable::set_entries(const EntryVec& entries) { template void CaseInsensitiveHashTable::add_index(T* entry) { - size_t h = fnv1a_hash_lower(entry->name) & index_mask_; + size_t h = hash::fnv1a(entry->name.data(), + entry->name.size(), ::tolower) & index_mask_; if (index_[h] == NULL) { index_[h] = entry; diff --git a/src/host.hpp b/src/host.hpp index 84108f026..928a641ed 100644 --- a/src/host.hpp +++ b/src/host.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include namespace cass { @@ -113,6 +114,8 @@ class Host : public RefCounted { Host(const Address& address, bool mark) : address_(address) + , rack_id_(0) + , dc_id_(0) , mark_(mark) , state_(ADDED) , address_string_(address.to_string()) { } @@ -140,6 +143,13 @@ class Host : public RefCounted { dc_ = dc; } + uint32_t rack_id() const { return rack_id_; } + uint32_t dc_id() const { return dc_id_; } + void set_rack_and_dc_ids(uint32_t rack_id, uint32_t dc_id) { + rack_id_ = rack_id; + dc_id_ = dc_id; + } + const std::string& listen_address() const { return listen_address_; } void set_listen_address(const std::string& listen_address) { listen_address_ = listen_address; @@ -219,6 +229,8 @@ class Host : public RefCounted { } Address address_; + uint32_t rack_id_; + uint32_t dc_id_; bool mark_; Atomic state_; std::string address_string_; diff --git a/src/io_worker.cpp b/src/io_worker.cpp index df63833b1..0f16275cb 100644 --- a/src/io_worker.cpp +++ b/src/io_worker.cpp @@ -35,6 +35,10 @@ IOWorker::IOWorker(Session* session) , keyspace_(new std::string) , pending_request_count_(0) , request_queue_(config_.queue_size_io()) { + pools_.set_empty_key(Address::EMPTY_KEY); + pools_.set_deleted_key(Address::DELETED_KEY); + unavailable_addresses_.set_empty_key(Address::EMPTY_KEY); + unavailable_addresses_.set_deleted_key(Address::DELETED_KEY); prepare_.data = this; uv_mutex_init(&unavailable_addresses_mutex_); } diff --git a/src/io_worker.hpp b/src/io_worker.hpp index c785d7bf7..64fb5d909 100644 --- a/src/io_worker.hpp +++ b/src/io_worker.hpp @@ -29,7 +29,8 @@ #include "spsc_queue.hpp" #include "timer.hpp" -#include +#include + #include #include @@ -130,7 +131,7 @@ class IOWorker #endif private: - typedef std::map > PoolMap; + typedef sparsehash::dense_hash_map, AddressHash> PoolMap; typedef std::vector > PoolVec; void schedule_reconnect(const Host::ConstPtr& host); diff --git a/src/latency_aware_policy.cpp b/src/latency_aware_policy.cpp index c48406eda..005543c56 100644 --- a/src/latency_aware_policy.cpp +++ b/src/latency_aware_policy.cpp @@ -46,7 +46,7 @@ void LatencyAwarePolicy::close_handles() { QueryPlan* LatencyAwarePolicy::new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache) { return new LatencyAwareQueryPlan(this, child_policy_->new_query_plan(connected_keyspace, request, diff --git a/src/latency_aware_policy.hpp b/src/latency_aware_policy.hpp index c71dfab9a..1f2ee4a6a 100644 --- a/src/latency_aware_policy.hpp +++ b/src/latency_aware_policy.hpp @@ -58,7 +58,7 @@ class LatencyAwarePolicy : public ChainedLoadBalancingPolicy { virtual QueryPlan* new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache); virtual LoadBalancingPolicy* new_instance() { diff --git a/src/list_policy.cpp b/src/list_policy.cpp index 19e8f9aec..891f6aa83 100644 --- a/src/list_policy.cpp +++ b/src/list_policy.cpp @@ -42,7 +42,7 @@ CassHostDistance ListPolicy::distance(const SharedRefPtr& host) const { QueryPlan* ListPolicy::new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache) { return child_policy_->new_query_plan(connected_keyspace, request, diff --git a/src/list_policy.hpp b/src/list_policy.hpp index 0d6ef4ce6..5dfbc6e9e 100644 --- a/src/list_policy.hpp +++ b/src/list_policy.hpp @@ -36,7 +36,7 @@ class ListPolicy : public ChainedLoadBalancingPolicy { virtual QueryPlan* new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache); virtual void on_add(const SharedRefPtr& host); diff --git a/src/load_balancing.hpp b/src/load_balancing.hpp index 37e6410c2..8c1731204 100644 --- a/src/load_balancing.hpp +++ b/src/load_balancing.hpp @@ -89,7 +89,7 @@ class LoadBalancingPolicy : public Host::StateListener, public RefCounted> 8; + result[2] = a_ >> 16; + result[3] = a_ >> 24; + result[4] = b_; + result[5] = b_ >> 8; + result[6] = b_ >> 16; + result[7] = b_ >> 24; + result[8] = c_; + result[9] = c_ >> 8; + result[10] = c_ >> 16; + result[11] = c_ >> 24; + result[12] = d_; + result[13] = d_ >> 8; + result[14] = d_ >> 16; + result[15] = d_ >> 24; + + memset(this, 0, sizeof(Md5)); +} + +void Md5::final(uint64_t* hi, uint64_t* lo) { + final(); + + *hi = static_cast(a_) << 32 | (static_cast(b_) & 0xFFFFFFFF); + *lo = static_cast(c_) << 32 | (static_cast(d_) & 0xFFFFFFFF); + + memset(this, 0, sizeof(Md5)); +} + +void Md5::final() { unsigned long used, free; used = lo_ & 0x3f; @@ -130,25 +162,6 @@ void Md5::final(uint8_t* result) { buffer_[63] = hi_ >> 24; body(buffer_, 64); - - result[0] = a_; - result[1] = a_ >> 8; - result[2] = a_ >> 16; - result[3] = a_ >> 24; - result[4] = b_; - result[5] = b_ >> 8; - result[6] = b_ >> 16; - result[7] = b_ >> 24; - result[8] = c_; - result[9] = c_ >> 8; - result[10] = c_ >> 16; - result[11] = c_ >> 24; - result[12] = d_; - result[13] = d_ >> 8; - result[14] = d_ >> 16; - result[15] = d_ >> 24; - - memset(this, 0, sizeof(Md5)); } // This processes one or more 64-byte data blocks, but does NOT update diff --git a/src/md5.hpp b/src/md5.hpp index 6b180dfbe..bd526093b 100644 --- a/src/md5.hpp +++ b/src/md5.hpp @@ -23,6 +23,7 @@ #include "macros.hpp" #include +#include namespace cass { @@ -32,8 +33,10 @@ class Md5 { void update(const uint8_t* data, size_t size); void final(uint8_t* result); + void final(uint64_t* hi, uint64_t* lo); private: + void final(); const uint8_t* body(const uint8_t* data, size_t size); private: diff --git a/src/metadata.cpp b/src/metadata.cpp index eba16739a..9e223e439 100644 --- a/src/metadata.cpp +++ b/src/metadata.cpp @@ -66,7 +66,6 @@ cass_uint32_t cass_schema_meta_snapshot_version(const CassSchemaMeta* schema_met return schema_meta->version(); } - CassVersion cass_schema_meta_version(const CassSchemaMeta* schema_meta) { CassVersion version; version.major_version = schema_meta->cassandra_version().major_version(); @@ -786,111 +785,105 @@ std::string Metadata::full_function_name(const std::string& name, const StringVe return full_function_name; } -Metadata::SchemaSnapshot Metadata::schema_snapshot() const { +Metadata::SchemaSnapshot Metadata::schema_snapshot(int protocol_version, const VersionNumber& cassandra_version) const { ScopedMutex l(&mutex_); return SchemaSnapshot(schema_snapshot_version_, - config_.protocol_version, - config_.cassandra_version, + protocol_version, + cassandra_version, front_.keyspaces()); } -void Metadata::update_keyspaces(ResultResponse* result) { - KeyspaceMetadata::Map updates; - +void Metadata::update_keyspaces(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_keyspaces(config_, result, updates); + updating_->update_keyspaces(protocol_version, cassandra_version, result); } else { - updating_->update_keyspaces(config_, result, updates); - } - - for (KeyspaceMetadata::Map::const_iterator i = updates.begin(); i != updates.end(); ++i) { - token_map_.update_keyspace(i->first, i->second); + updating_->update_keyspaces(protocol_version, cassandra_version, result); } } -void Metadata::update_tables(ResultResponse* result) { +void Metadata::update_tables(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_tables(config_, result); + updating_->update_tables(protocol_version, cassandra_version, result); } else { - updating_->update_tables(config_, result); + updating_->update_tables(protocol_version, cassandra_version, result); } } -void Metadata::update_views(ResultResponse* result) { +void Metadata::update_views(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_views(config_, result); + updating_->update_views(protocol_version, cassandra_version, result); } else { - updating_->update_views(config_, result); + updating_->update_views(protocol_version, cassandra_version, result); } } -void Metadata::update_columns(ResultResponse* result) { +void Metadata::update_columns(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_columns(config_, result); - if (cassandra_version() < VersionNumber(3, 0, 0)) { - updating_->update_legacy_indexes(config_, result); + updating_->update_columns(protocol_version, cassandra_version, native_types_, result); + if (cassandra_version < VersionNumber(3, 0, 0)) { + updating_->update_legacy_indexes(protocol_version, cassandra_version, result); } } else { - updating_->update_columns(config_, result); - if (cassandra_version() < VersionNumber(3, 0, 0)) { - updating_->update_legacy_indexes(config_, result); + updating_->update_columns(protocol_version, cassandra_version, native_types_, result); + if (cassandra_version < VersionNumber(3, 0, 0)) { + updating_->update_legacy_indexes(protocol_version, cassandra_version, result); } } } -void Metadata::update_indexes(ResultResponse* result) { +void Metadata::update_indexes(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_indexes(config_, result); + updating_->update_indexes(protocol_version, cassandra_version, result); } else { - updating_->update_indexes(config_, result); + updating_->update_indexes(protocol_version, cassandra_version, result); } } -void Metadata::update_user_types(ResultResponse* result) { +void Metadata::update_user_types(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_user_types(config_, result); + updating_->update_user_types(protocol_version, cassandra_version, native_types_, result); } else { - updating_->update_user_types(config_, result); + updating_->update_user_types(protocol_version, cassandra_version, native_types_, result); } } -void Metadata::update_functions(ResultResponse* result) { +void Metadata::update_functions(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_functions(config_, result); + updating_->update_functions(protocol_version, cassandra_version, native_types_, result); } else { - updating_->update_functions(config_, result); + updating_->update_functions(protocol_version, cassandra_version, native_types_, result); } } -void Metadata::update_aggregates(ResultResponse* result) { +void Metadata::update_aggregates(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { schema_snapshot_version_++; if (is_front_buffer()) { ScopedMutex l(&mutex_); - updating_->update_aggregates(config_, result); + updating_->update_aggregates(protocol_version, cassandra_version, native_types_, result); } else { - updating_->update_aggregates(config_, result); + updating_->update_aggregates(protocol_version, cassandra_version, native_types_, result); } } @@ -949,13 +942,12 @@ void Metadata::drop_aggregate(const std::string& keyspace_name, const std::strin } } -void Metadata::clear_and_update_back() { - if (config_.cassandra_version >= VersionNumber(3, 0, 0)) { - config_.native_types.init_cql_names(); +void Metadata::clear_and_update_back(const VersionNumber& cassandra_version) { + if (cassandra_version >= VersionNumber(3, 0, 0)) { + native_types_.init_cql_names(); } else { - config_.native_types.init_class_names(); + native_types_.init_class_names(); } - token_map_.clear(); back_.clear(); updating_ = &back_; } @@ -977,7 +969,6 @@ void Metadata::clear() { front_.clear(); } back_.clear(); - token_map_.clear(); } const Value* MetadataBase::get_field(const std::string& name) const { @@ -1174,10 +1165,10 @@ const UserType* KeyspaceMetadata::get_user_type(const std::string& name) const { return i->second.get(); } -void KeyspaceMetadata::update(const MetadataConfig& config, const SharedRefPtr& buffer, const Row* row) { +void KeyspaceMetadata::update(int protocol_version, const VersionNumber& cassandra_version, const SharedRefPtr& buffer, const Row* row) { add_field(buffer, row, "keyspace_name"); add_field(buffer, row, "durable_writes"); - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { + if (cassandra_version >= VersionNumber(3, 0, 0)) { const Value* map = add_field(buffer, row, "replication"); if (map != NULL && map->value_type() == CASS_VALUE_TYPE_MAP && @@ -1199,7 +1190,7 @@ void KeyspaceMetadata::update(const MetadataConfig& config, const SharedRefPtrvalue_type())) { strategy_class_ = value->to_string_ref(); } - const Value* map = add_json_map_field(config.protocol_version, row, "strategy_options"); + const Value* map = add_json_map_field(protocol_version, row, "strategy_options"); if (map != NULL) { strategy_options_ = *map; } @@ -1238,7 +1229,7 @@ void KeyspaceMetadata::drop_aggregate(const std::string& full_aggregate_name) { aggregates_->erase(full_aggregate_name); } -TableMetadataBase::TableMetadataBase(const MetadataConfig& config, +TableMetadataBase::TableMetadataBase(int protocol_version, const VersionNumber& cassandra_version, const std::string& name, const SharedRefPtr& buffer, const Row* row) : MetadataBase(name) { add_field(buffer, row, "keyspace_name"); @@ -1254,7 +1245,7 @@ TableMetadataBase::TableMetadataBase(const MetadataConfig& config, add_field(buffer, row, "memtable_flush_period_in_ms"); add_field(buffer, row, "read_repair_chance"); - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { + if (cassandra_version >= VersionNumber(3, 0, 0)) { add_field(buffer, row, "dclocal_read_repair_chance"); add_field(buffer, row, "crc_check_chance"); add_field(buffer, row, "compaction"); @@ -1265,15 +1256,15 @@ TableMetadataBase::TableMetadataBase(const MetadataConfig& config, add_field(buffer, row, "local_read_repair_chance"); add_field(buffer, row, "compaction_strategy_class"); - add_json_map_field(config.protocol_version, row, "compaction_strategy_options"); - add_json_map_field(config.protocol_version, row, "compression_parameters"); + add_json_map_field(protocol_version, row, "compaction_strategy_options"); + add_json_map_field(protocol_version, row, "compression_parameters"); - add_json_list_field(config.protocol_version, row, "column_aliases"); + add_json_list_field(protocol_version, row, "column_aliases"); add_field(buffer, row, "comparator"); add_field(buffer, row, "subcomparator"); add_field(buffer, row, "default_validator"); add_field(buffer, row, "key_alias"); - add_json_list_field(config.protocol_version, row, "key_aliases"); + add_json_list_field(protocol_version, row, "key_aliases"); add_field(buffer, row, "value_alias"); add_field(buffer, row, "key_validator"); add_field(buffer, row, "type"); @@ -1316,13 +1307,13 @@ size_t get_column_count(const ColumnMetadata::Vec& columns, CassColumnType type) return count; } -void TableMetadataBase::build_keys_and_sort(const MetadataConfig& config) { +void TableMetadataBase::build_keys_and_sort(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types) { // Also, Reorders columns so that the order is: // 1) Parition key // 2) Clustering keys // 3) Other columns - if (config.cassandra_version.major_version() >= 2) { + if (cassandra_version.major_version() >= 2) { partition_key_.resize(get_column_count(columns_, CASS_COLUMN_TYPE_PARTITION_KEY)); clustering_key_.resize(get_column_count(columns_, CASS_COLUMN_TYPE_CLUSTERING_KEY)); clustering_key_order_.resize(clustering_key_.size(), CASS_CLUSTERING_ORDER_NONE); @@ -1359,7 +1350,7 @@ void TableMetadataBase::build_keys_and_sort(const MetadataConfig& config) { } SharedRefPtr key_validator - = DataTypeClassNameParser::parse_with_composite(get_string_field("key_validator"), config.native_types); + = DataTypeClassNameParser::parse_with_composite(get_string_field("key_validator"), native_types); size_t size = key_validator->types().size(); partition_key_.reserve(size); for (size_t i = 0; i < size; ++i) { @@ -1393,7 +1384,7 @@ void TableMetadataBase::build_keys_and_sort(const MetadataConfig& config) { // TODO: Figure out how to test these special cases and properly document them here SharedRefPtr comparator - = DataTypeClassNameParser::parse_with_composite(get_string_field("comparator"), config.native_types); + = DataTypeClassNameParser::parse_with_composite(get_string_field("comparator"), native_types); size_t size = comparator->types().size(); if (comparator->is_composite()) { if (!comparator->collections().empty() || @@ -1439,11 +1430,11 @@ void TableMetadataBase::build_keys_and_sort(const MetadataConfig& config) { const TableMetadata::Ptr TableMetadata::NIL; -TableMetadata::TableMetadata(const MetadataConfig& config, +TableMetadata::TableMetadata(int protocol_version, const VersionNumber& cassandra_version, const std::string& name, const SharedRefPtr& buffer, const Row* row) - : TableMetadataBase(config, name, buffer, row) { - add_field(buffer, row, table_column_name(config.cassandra_version)); - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { + : TableMetadataBase(protocol_version, cassandra_version, name, buffer, row) { + add_field(buffer, row, table_column_name(cassandra_version)); + if (cassandra_version >= VersionNumber(3, 0, 0)) { add_field(buffer, row, "flags"); } } @@ -1495,10 +1486,10 @@ void TableMetadata::key_aliases(const NativeDataTypes& native_types, KeyAliases* const ViewMetadata::Ptr ViewMetadata::NIL; -ViewMetadata::ViewMetadata(const MetadataConfig& config, +ViewMetadata::ViewMetadata(int protocol_version, const VersionNumber& cassandra_version, TableMetadata* table, const std::string& name, const SharedRefPtr& buffer, const Row* row) - : TableMetadataBase(config, name, buffer, row) + : TableMetadataBase(protocol_version, cassandra_version, name, buffer, row) , base_table_(table) { add_field(buffer, row, "keyspace_name"); add_field(buffer, row, "view_name"); @@ -1525,7 +1516,7 @@ void TableMetadata::clear_indexes() { indexes_by_name_.clear(); } -FunctionMetadata::FunctionMetadata(const MetadataConfig& config, +FunctionMetadata::FunctionMetadata(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, const std::string& name, const Value* signature, KeyspaceMetadata* keyspace, const SharedRefPtr& buffer, const Row* row) @@ -1547,16 +1538,16 @@ FunctionMetadata::FunctionMetadata(const MetadataConfig& config, value2->primary_value_type() == CASS_VALUE_TYPE_VARCHAR) { CollectionIterator iterator1(value1); CollectionIterator iterator2(value2); - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { + if (cassandra_version >= VersionNumber(3, 0, 0)) { while (iterator1.next() && iterator2.next()) { StringRef arg_name(iterator1.value()->to_string_ref()); - DataType::ConstPtr arg_type(DataTypeCqlNameParser::parse(iterator2.value()->to_string(), config.native_types, keyspace)); + DataType::ConstPtr arg_type(DataTypeCqlNameParser::parse(iterator2.value()->to_string(), native_types, keyspace)); args_.push_back(Argument(arg_name, arg_type)); } } else { while (iterator1.next() && iterator2.next()) { StringRef arg_name(iterator1.value()->to_string_ref()); - DataType::ConstPtr arg_type(DataTypeClassNameParser::parse_one(iterator2.value()->to_string(), config.native_types)); + DataType::ConstPtr arg_type(DataTypeClassNameParser::parse_one(iterator2.value()->to_string(), native_types)); args_.push_back(Argument(arg_name, arg_type)); } } @@ -1565,10 +1556,10 @@ FunctionMetadata::FunctionMetadata(const MetadataConfig& config, value1 = add_field(buffer, row, "return_type"); if (value1 != NULL && value1->value_type() == CASS_VALUE_TYPE_VARCHAR) { - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { - return_type_ = DataTypeCqlNameParser::parse(value1->to_string(), config.native_types, keyspace); + if (cassandra_version >= VersionNumber(3, 0, 0)) { + return_type_ = DataTypeCqlNameParser::parse(value1->to_string(), native_types, keyspace); } else { - return_type_ = DataTypeClassNameParser::parse_one(value1->to_string(), config.native_types); + return_type_ = DataTypeClassNameParser::parse_one(value1->to_string(), native_types); } } @@ -1597,7 +1588,7 @@ const DataType* FunctionMetadata::get_arg_type(StringRef name) const { return i->type.get(); } -AggregateMetadata::AggregateMetadata(const MetadataConfig& config, +AggregateMetadata::AggregateMetadata(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, const std::string& name, const Value* signature, KeyspaceMetadata* keyspace, const SharedRefPtr& buffer, const Row* row) @@ -1614,13 +1605,13 @@ AggregateMetadata::AggregateMetadata(const MetadataConfig& config, value->value_type() == CASS_VALUE_TYPE_LIST && value->primary_value_type() == CASS_VALUE_TYPE_VARCHAR) { CollectionIterator iterator(value); - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { + if (cassandra_version >= VersionNumber(3, 0, 0)) { while (iterator.next()) { - arg_types_.push_back(DataTypeCqlNameParser::parse(iterator.value()->to_string(), config.native_types, keyspace)); + arg_types_.push_back(DataTypeCqlNameParser::parse(iterator.value()->to_string(), native_types, keyspace)); } } else { while (iterator.next()) { - arg_types_.push_back(DataTypeClassNameParser::parse_one(iterator.value()->to_string(), config.native_types)); + arg_types_.push_back(DataTypeClassNameParser::parse_one(iterator.value()->to_string(), native_types)); } } } @@ -1628,20 +1619,20 @@ AggregateMetadata::AggregateMetadata(const MetadataConfig& config, value = add_field(buffer, row, "return_type"); if (value != NULL && value->value_type() == CASS_VALUE_TYPE_VARCHAR) { - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { - return_type_ = DataTypeCqlNameParser::parse(value->to_string(), config.native_types, keyspace); + if (cassandra_version >= VersionNumber(3, 0, 0)) { + return_type_ = DataTypeCqlNameParser::parse(value->to_string(), native_types, keyspace); } else { - return_type_ = DataTypeClassNameParser::parse_one(value->to_string(), config.native_types); + return_type_ = DataTypeClassNameParser::parse_one(value->to_string(), native_types); } } value = add_field(buffer, row, "state_type"); if (value != NULL && value->value_type() == CASS_VALUE_TYPE_VARCHAR) { - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { - state_type_ = DataTypeCqlNameParser::parse(value->to_string(), config.native_types, keyspace); + if (cassandra_version >= VersionNumber(3, 0, 0)) { + state_type_ = DataTypeCqlNameParser::parse(value->to_string(), native_types, keyspace); } else { - state_type_ = DataTypeClassNameParser::parse_one(value->to_string(), config.native_types); + state_type_ = DataTypeClassNameParser::parse_one(value->to_string(), native_types); } } @@ -1672,11 +1663,11 @@ AggregateMetadata::AggregateMetadata(const MetadataConfig& config, value = add_field(buffer, row, "initcond"); if (value != NULL) { if (value->value_type() == CASS_VALUE_TYPE_BLOB) { - init_cond_ = Value(config.protocol_version, state_type_, value->data(), value->size()); - } else if (config.cassandra_version >= VersionNumber(3, 0, 0) && + init_cond_ = Value(protocol_version, state_type_, value->data(), value->size()); + } else if (cassandra_version >= VersionNumber(3, 0, 0) && value->value_type() == CASS_VALUE_TYPE_VARCHAR) { - init_cond_ = Value(config.protocol_version, - config.native_types.by_cql_name("varchar"), + init_cond_ = Value(protocol_version, + native_types.by_cql_name("varchar"), value->data(), value->size()); } } @@ -1715,7 +1706,7 @@ void IndexMetadata::update(StringRef kind, const Value* options) { options_ = *options; } -IndexMetadata::Ptr IndexMetadata::from_legacy(const MetadataConfig& config, +IndexMetadata::Ptr IndexMetadata::from_legacy(int protocol_version, const std::string& index_name, const ColumnMetadata* column, const SharedRefPtr& buffer, const Row* row) { IndexMetadata::Ptr index(new IndexMetadata(index_name)); @@ -1729,7 +1720,7 @@ IndexMetadata::Ptr IndexMetadata::from_legacy(const MetadataConfig& config, index_type = value->to_string_ref(); } - const Value* options = index->add_json_map_field(config.protocol_version, row, "index_options"); + const Value* options = index->add_json_map_field(protocol_version, row, "index_options"); index->update_legacy(index_type, column, options); return index; @@ -1777,7 +1768,7 @@ CassIndexType IndexMetadata::index_type_from_string(StringRef index_type) { return CASS_INDEX_TYPE_UNKNOWN; } -ColumnMetadata::ColumnMetadata(const MetadataConfig& config, +ColumnMetadata::ColumnMetadata(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, const std::string& name, KeyspaceMetadata* keyspace, const SharedRefPtr& buffer, const Row* row) @@ -1788,10 +1779,10 @@ ColumnMetadata::ColumnMetadata(const MetadataConfig& config, const Value* value; add_field(buffer, row, "keyspace_name"); - add_field(buffer, row, table_column_name(config.cassandra_version)); + add_field(buffer, row, table_column_name(cassandra_version)); add_field(buffer, row, "column_name"); - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { + if (cassandra_version >= VersionNumber(3, 0, 0)) { value = add_field(buffer, row, "clustering_order"); if (value != NULL && value->value_type() == CASS_VALUE_TYPE_VARCHAR && @@ -1827,7 +1818,7 @@ ColumnMetadata::ColumnMetadata(const MetadataConfig& config, if (value != NULL && value->value_type() == CASS_VALUE_TYPE_VARCHAR) { std::string type(value->to_string()); - data_type_ = DataTypeCqlNameParser::parse(type, config.native_types, keyspace); + data_type_ = DataTypeCqlNameParser::parse(type, native_types, keyspace); } } else { value = add_field(buffer, row, "type"); @@ -1859,20 +1850,19 @@ ColumnMetadata::ColumnMetadata(const MetadataConfig& config, if (value != NULL && value->value_type() == CASS_VALUE_TYPE_VARCHAR) { std::string validator(value->to_string()); - data_type_ = DataTypeClassNameParser::parse_one(validator, config.native_types); + data_type_ = DataTypeClassNameParser::parse_one(validator, native_types); is_reversed_ = DataTypeClassNameParser::is_reversed(validator); } add_field(buffer, row, "index_type"); add_field(buffer, row, "index_name"); - add_json_map_field(config.protocol_version, row, "index_options"); + add_json_map_field(protocol_version, row, "index_options"); } } -void Metadata::InternalData::update_keyspaces(const MetadataConfig& config, - ResultResponse* result, KeyspaceMetadata::Map& updates) { +void Metadata::InternalData::update_keyspaces(int protocol_version, const VersionNumber& cassandra_version, + ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); while (rows.next()) { @@ -1885,16 +1875,14 @@ void Metadata::InternalData::update_keyspaces(const MetadataConfig& config, } KeyspaceMetadata* keyspace = get_or_create_keyspace(keyspace_name); - keyspace->update(config, buffer, row); - updates.insert(std::make_pair(keyspace_name, *keyspace)); + keyspace->update(protocol_version, cassandra_version, buffer, row); } } -void Metadata::InternalData::update_tables(const MetadataConfig& config, +void Metadata::InternalData::update_tables(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); std::string keyspace_name; @@ -1906,8 +1894,8 @@ void Metadata::InternalData::update_tables(const MetadataConfig& config, const Row* row = rows.row(); if (!row->get_string_by_name("keyspace_name", &temp_keyspace_name) || - !row->get_string_by_name(table_column_name(config.cassandra_version), &table_name)) { - LOG_ERROR("Unable to get column value for 'keyspace_name' or '%s'", table_column_name(config.cassandra_version)); + !row->get_string_by_name(table_column_name(cassandra_version), &table_name)) { + LOG_ERROR("Unable to get column value for 'keyspace_name' or '%s'", table_column_name(cassandra_version)); continue; } @@ -1916,15 +1904,14 @@ void Metadata::InternalData::update_tables(const MetadataConfig& config, keyspace = get_or_create_keyspace(keyspace_name); } - keyspace->add_table(TableMetadata::Ptr(new TableMetadata(config, table_name, buffer, row))); + keyspace->add_table(TableMetadata::Ptr(new TableMetadata(protocol_version, cassandra_version, table_name, buffer, row))); } } -void Metadata::InternalData::update_views(const MetadataConfig& config, +void Metadata::InternalData::update_views(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); std::string keyspace_name; @@ -1960,7 +1947,7 @@ void Metadata::InternalData::update_views(const MetadataConfig& config, continue; } - ViewMetadata::Ptr view(new ViewMetadata(config, table.get(), view_name, buffer, row)); + ViewMetadata::Ptr view(new ViewMetadata(protocol_version, cassandra_version, table.get(), view_name, buffer, row)); keyspace->add_view(view); table->add_view(view); updated_tables.push_back(table); @@ -1972,8 +1959,7 @@ void Metadata::InternalData::update_views(const MetadataConfig& config, } } -void Metadata::InternalData::update_user_types(const MetadataConfig& config, ResultResponse* result) { - result->decode_first_row(); +void Metadata::InternalData::update_user_types(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result) { ResultIterator rows(result); std::string keyspace_name; @@ -2034,10 +2020,10 @@ void Metadata::InternalData::update_user_types(const MetadataConfig& config, Res DataType::ConstPtr data_type; - if (config.cassandra_version >= VersionNumber(3, 0, 0)) { - data_type = DataTypeCqlNameParser::parse(type->to_string(), config.native_types, keyspace); + if (cassandra_version >= VersionNumber(3, 0, 0)) { + data_type = DataTypeCqlNameParser::parse(type->to_string(), native_types, keyspace); } else { - data_type = DataTypeClassNameParser::parse_one(type->to_string(), config.native_types); + data_type = DataTypeClassNameParser::parse_one(type->to_string(), native_types); } if (!data_type) { @@ -2055,11 +2041,10 @@ void Metadata::InternalData::update_user_types(const MetadataConfig& config, Res } } -void Metadata::InternalData::update_functions(const MetadataConfig& config, +void Metadata::InternalData::update_functions(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); std::string keyspace_name; @@ -2070,7 +2055,7 @@ void Metadata::InternalData::update_functions(const MetadataConfig& config, std::string function_name; const Row* row = rows.row(); - const Value* signature = row->get_by_name(signature_column_name(config.cassandra_version)); + const Value* signature = row->get_by_name(signature_column_name(cassandra_version)); if (!row->get_string_by_name("keyspace_name", &temp_keyspace_name) || !row->get_string_by_name("function_name", &function_name) || signature == NULL) { @@ -2083,7 +2068,7 @@ void Metadata::InternalData::update_functions(const MetadataConfig& config, keyspace = get_or_create_keyspace(keyspace_name); } - keyspace->add_function(FunctionMetadata::Ptr(new FunctionMetadata(config, + keyspace->add_function(FunctionMetadata::Ptr(new FunctionMetadata(protocol_version, cassandra_version, native_types, function_name, signature, keyspace, buffer, row))); @@ -2091,10 +2076,9 @@ void Metadata::InternalData::update_functions(const MetadataConfig& config, } } -void Metadata::InternalData::update_aggregates(const MetadataConfig& config, ResultResponse* result) { +void Metadata::InternalData::update_aggregates(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); std::string keyspace_name; @@ -2105,7 +2089,7 @@ void Metadata::InternalData::update_aggregates(const MetadataConfig& config, Res std::string aggregate_name; const Row* row = rows.row(); - const Value* signature = row->get_by_name(signature_column_name(config.cassandra_version)); + const Value* signature = row->get_by_name(signature_column_name(cassandra_version)); if (!row->get_string_by_name("keyspace_name", &temp_keyspace_name) || !row->get_string_by_name("aggregate_name", &aggregate_name) || signature == NULL) { @@ -2118,7 +2102,7 @@ void Metadata::InternalData::update_aggregates(const MetadataConfig& config, Res keyspace = get_or_create_keyspace(keyspace_name); } - keyspace->add_aggregate(AggregateMetadata::Ptr(new AggregateMetadata(config, + keyspace->add_aggregate(AggregateMetadata::Ptr(new AggregateMetadata(protocol_version, cassandra_version, native_types, aggregate_name, signature, keyspace, buffer, row))); @@ -2154,10 +2138,9 @@ void Metadata::InternalData::drop_aggregate(const std::string& keyspace_name, co i->second.drop_aggregate(full_aggregate_name); } -void Metadata::InternalData::update_columns(const MetadataConfig& config, ResultResponse* result) { +void Metadata::InternalData::update_columns(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); std::string keyspace_name; @@ -2173,10 +2156,10 @@ void Metadata::InternalData::update_columns(const MetadataConfig& config, Result const Row* row = rows.row(); if (!row->get_string_by_name("keyspace_name", &temp_keyspace_name) || - !row->get_string_by_name(table_column_name(config.cassandra_version), &temp_table_or_view_name) || + !row->get_string_by_name(table_column_name(cassandra_version), &temp_table_or_view_name) || !row->get_string_by_name("column_name", &column_name)) { LOG_ERROR("Unable to get column value for 'keyspace_name', '%s' or 'column_name'", - table_column_name(config.cassandra_version)); + table_column_name(cassandra_version)); continue; } @@ -2189,7 +2172,7 @@ void Metadata::InternalData::update_columns(const MetadataConfig& config, Result if (table_or_view_name != temp_table_or_view_name) { // Build keys for the previous table if (table_or_view) { - table_or_view->build_keys_and_sort(config); + table_or_view->build_keys_and_sort(protocol_version, cassandra_version, native_types); } table_or_view_name = temp_table_or_view_name; table_or_view = TableMetadataBase::Ptr(keyspace->get_table(table_or_view_name)); @@ -2201,18 +2184,18 @@ void Metadata::InternalData::update_columns(const MetadataConfig& config, Result } if (table_or_view) { - table_or_view->add_column(ColumnMetadata::Ptr(new ColumnMetadata(config, column_name, + table_or_view->add_column(ColumnMetadata::Ptr(new ColumnMetadata(protocol_version, cassandra_version, native_types, column_name, keyspace, buffer, row))); } } // Build keys for the last table if (table_or_view) { - table_or_view->build_keys_and_sort(config); + table_or_view->build_keys_and_sort(protocol_version, cassandra_version, native_types); } } -void Metadata::InternalData::update_legacy_indexes(const MetadataConfig& config, ResultResponse* result) { +void Metadata::InternalData::update_legacy_indexes(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); ResultIterator rows(result); @@ -2230,10 +2213,10 @@ void Metadata::InternalData::update_legacy_indexes(const MetadataConfig& config, const Row* row = rows.row(); if (!row->get_string_by_name("keyspace_name", &temp_keyspace_name) || - !row->get_string_by_name(table_column_name(config.cassandra_version), &temp_table_name) || + !row->get_string_by_name(table_column_name(cassandra_version), &temp_table_name) || !row->get_string_by_name("column_name", &column_name)) { LOG_ERROR("Unable to get column value for 'keyspace_name', '%s' or 'column_name'", - table_column_name(config.cassandra_version)); + table_column_name(cassandra_version)); continue; } @@ -2257,17 +2240,16 @@ void Metadata::InternalData::update_legacy_indexes(const MetadataConfig& config, if (index_type != NULL && index_type->value_type() == CASS_VALUE_TYPE_VARCHAR) { std::string index_name = column->get_string_field("index_name"); - table->add_index(IndexMetadata::from_legacy(config, index_name, column, buffer, row)); + table->add_index(IndexMetadata::from_legacy(protocol_version, index_name, column, buffer, row)); } } } } } -void Metadata::InternalData::update_indexes(const MetadataConfig& config, ResultResponse* result) { +void Metadata::InternalData::update_indexes(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result) { SharedRefPtr buffer = result->buffer(); - result->decode_first_row(); ResultIterator rows(result); std::string keyspace_name; diff --git a/src/metadata.hpp b/src/metadata.hpp index f96487359..5b36339c5 100644 --- a/src/metadata.hpp +++ b/src/metadata.hpp @@ -18,12 +18,12 @@ #define __CASS_SCHEMA_METADATA_HPP_INCLUDED__ #include "copy_on_write_ptr.hpp" +#include "host.hpp" #include "iterator.hpp" #include "macros.hpp" #include "ref_counted.hpp" #include "scoped_lock.hpp" #include "scoped_ptr.hpp" -#include "token_map.hpp" #include "data_type.hpp" #include "value.hpp" @@ -97,14 +97,6 @@ class VecIteratorImpl { typename Collection::const_iterator end_; }; -struct MetadataConfig { - MetadataConfig() - : protocol_version(0) { } - int protocol_version; - VersionNumber cassandra_version; - NativeDataTypes native_types; -}; - class MetadataField { public: typedef std::map Map; @@ -208,7 +200,7 @@ class FunctionMetadata : public MetadataBase, public RefCounted& buffer, const Row* row); @@ -241,7 +233,7 @@ class AggregateMetadata : public MetadataBase, public RefCounted Map; typedef std::vector Vec; - AggregateMetadata(const MetadataConfig& config, + AggregateMetadata(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, const std::string& name, const Value* signature, KeyspaceMetadata* keyspace, const SharedRefPtr& buffer, const Row* row); @@ -282,7 +274,7 @@ class IndexMetadata : public MetadataBase, public RefCounted { const SharedRefPtr& buffer, const Row* row); void update(StringRef index_type, const Value* options); - static IndexMetadata::Ptr from_legacy(const MetadataConfig& config, + static IndexMetadata::Ptr from_legacy(int protocol_version, const std::string& index_name, const ColumnMetadata* column, const SharedRefPtr& buffer, const Row* row); void update_legacy(StringRef index_type, const ColumnMetadata* column, const Value* options); @@ -324,7 +316,7 @@ class ColumnMetadata : public MetadataBase, public RefCounted { , data_type_(data_type) , is_reversed_(false) { } - ColumnMetadata(const MetadataConfig& config, + ColumnMetadata(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, const std::string& name, KeyspaceMetadata* keyspace, const SharedRefPtr& buffer, const Row* row); @@ -360,7 +352,7 @@ class TableMetadataBase : public MetadataBase, public RefCounted& buffer, const Row* row); virtual ~TableMetadataBase() { } @@ -374,7 +366,7 @@ class TableMetadataBase : public MetadataBase, public RefCounted& buffer, const Row* row); @@ -473,7 +465,7 @@ class TableMetadata : public TableMetadataBase { const IndexMetadata* index() const { return impl_.item().get(); } }; - TableMetadata(const MetadataConfig& config, const std::string& name, + TableMetadata(int protocol_version, const VersionNumber& cassandra_version, const std::string& name, const SharedRefPtr& buffer, const Row* row); const ViewMetadata::Vec& views() const { return views_; } @@ -539,7 +531,7 @@ class KeyspaceMetadata : public MetadataBase { , functions_(new FunctionMetadata::Map) , aggregates_(new AggregateMetadata::Map) { } - void update(const MetadataConfig& config, + void update(int protocol_version, const VersionNumber& cassandra_version, const SharedRefPtr& buffer, const Row* row); const FunctionMetadata::Map& functions() const { return *functions_; } @@ -636,16 +628,16 @@ class Metadata { uv_mutex_destroy(&mutex_); } - SchemaSnapshot schema_snapshot() const; + SchemaSnapshot schema_snapshot(int protocol_version, const VersionNumber& cassandra_version) const; - void update_keyspaces(ResultResponse* result); - void update_tables(ResultResponse* result); - void update_views(ResultResponse* result); - void update_columns(ResultResponse* result); - void update_indexes(ResultResponse* result); - void update_user_types(ResultResponse* result); - void update_functions(ResultResponse* result); - void update_aggregates(ResultResponse* result); + void update_keyspaces(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_tables(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_views(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_columns(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_indexes(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_user_types(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_functions(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_aggregates(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); void drop_keyspace(const std::string& keyspace_name); void drop_table_or_view(const std::string& keyspace_name, const std::string& table_or_view_name); @@ -655,7 +647,7 @@ class Metadata { // This clears and allows updates to the back buffer while preserving // the front buffer for snapshots. - void clear_and_update_back(); + void clear_and_update_back(const VersionNumber& cassandra_version); // This swaps the back buffer to the front and makes incremental updates // happen directly to the front buffer. @@ -663,22 +655,6 @@ class Metadata { void clear(); - void set_protocol_version(int version) { - config_.protocol_version = version; - } - - const VersionNumber& cassandra_version() const { return config_.cassandra_version; } - void set_cassandra_version(const VersionNumber& cassandra_version) { - config_.cassandra_version = cassandra_version; - } - - void set_partitioner(const std::string& partitioner_class) { token_map_.set_partitioner(partitioner_class); } - void update_host(SharedRefPtr& host, const TokenStringList& tokens) { token_map_.update_host(host, tokens); } - void build() { token_map_.build(); } - void remove_host(SharedRefPtr& host) { token_map_.remove_host(host); } - - const TokenMap& token_map() const { return token_map_; } - private: bool is_front_buffer() const { return updating_ == &front_; } @@ -690,15 +666,15 @@ class Metadata { const KeyspaceMetadata::MapPtr& keyspaces() const { return keyspaces_; } - void update_keyspaces(const MetadataConfig& config, ResultResponse* result, KeyspaceMetadata::Map& updates); - void update_tables(const MetadataConfig& config, ResultResponse* result); - void update_views(const MetadataConfig& config, ResultResponse* result); - void update_columns(const MetadataConfig& config, ResultResponse* result); - void update_legacy_indexes(const MetadataConfig& config, ResultResponse* result); - void update_indexes(const MetadataConfig& config, ResultResponse* result); - void update_user_types(const MetadataConfig& config, ResultResponse* result); - void update_functions(const MetadataConfig& config, ResultResponse* result); - void update_aggregates(const MetadataConfig& config, ResultResponse* result); + void update_keyspaces(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_tables(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_views(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_columns(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result); + void update_legacy_indexes(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_indexes(int protocol_version, const VersionNumber& cassandra_version, ResultResponse* result); + void update_user_types(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result); + void update_functions(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result); + void update_aggregates(int protocol_version, const VersionNumber& cassandra_version, const NativeDataTypes native_types, ResultResponse* result); void drop_keyspace(const std::string& keyspace_name); void drop_table_or_view(const std::string& keyspace_name, const std::string& table_or_view_name); @@ -733,14 +709,9 @@ class Metadata { // This lock prevents partial snapshots when updating metadata mutable uv_mutex_t mutex_; - // Only used internally on a single thread so it doesn't currently use - // copy-on-write. When this is exposed externally it needs to be - // moved into the InternalData class and made to use copy-on-write. - TokenMap token_map_; - // Only used internally on a single thread, there's // no need for copy-on-write. - MetadataConfig config_; + NativeDataTypes native_types_; private: DISALLOW_COPY_AND_ASSIGN(Metadata); diff --git a/src/pool.cpp b/src/pool.cpp index 348ec8713..205aa41b5 100644 --- a/src/pool.cpp +++ b/src/pool.cpp @@ -27,6 +27,8 @@ #include "result_response.hpp" #include "timer.hpp" +#include + namespace cass { static bool least_busy_comp(Connection* a, Connection* b) { @@ -114,8 +116,8 @@ void Pool::close(bool cancel_reconnect) { it != end; ++it) { (*it)->close(); } - for (ConnectionSet::iterator it = connections_pending_.begin(), - end = connections_pending_.end(); + for (ConnectionVec::iterator it = pending_connections_.begin(), + end = pending_connections_.end(); it != end; ++it) { (*it)->close(); } @@ -231,7 +233,7 @@ void Pool::flush() { void Pool::maybe_notify_ready() { // This will notify ready even if all the connections fail. // it is up to the holder to inspect state - if (state_ == POOL_STATE_CONNECTING && connections_pending_.empty()) { + if (state_ == POOL_STATE_CONNECTING && pending_connections_.empty()) { LOG_DEBUG("Pool(%p) connected to host %s", static_cast(this), host_->address_string().c_str()); @@ -242,7 +244,7 @@ void Pool::maybe_notify_ready() { void Pool::maybe_close() { if (state_ == POOL_STATE_CLOSING && connections_.empty() && - connections_pending_.empty()) { + pending_connections_.empty()) { LOG_DEBUG("Pool(%p) closed connections to host %s", static_cast(this), @@ -266,16 +268,16 @@ void Pool::spawn_connection() { static_cast(this)); connection->connect(); - connections_pending_.insert(connection); + pending_connections_.push_back(connection); } } void Pool::maybe_spawn_connection() { - if (connections_pending_.size() >= config_.max_concurrent_creation()) { + if (pending_connections_.size() >= config_.max_concurrent_creation()) { return; } - if (connections_.size() + connections_pending_.size() >= + if (connections_.size() + pending_connections_.size() >= config_.max_connections_per_host()) { return; } @@ -297,7 +299,8 @@ Connection* Pool::find_least_busy() { } void Pool::on_ready(Connection* connection) { - connections_pending_.erase(connection); + pending_connections_.erase(std::remove(pending_connections_.begin(), pending_connections_.end(), connection), + pending_connections_.end()); connections_.push_back(connection); return_connection(connection); @@ -307,7 +310,8 @@ void Pool::on_ready(Connection* connection) { } void Pool::on_close(Connection* connection) { - connections_pending_.erase(connection); + pending_connections_.erase(std::remove(pending_connections_.begin(), pending_connections_.end(), connection), + pending_connections_.end()); ConnectionVec::iterator it = std::find(connections_.begin(), connections_.end(), connection); @@ -402,7 +406,7 @@ void Pool::on_partial_reconnect(Timer* timer) { Pool* pool = static_cast(timer->data()); size_t current = pool->connections_.size() + - pool->connections_pending_.size(); + pool->pending_connections_.size(); size_t want = pool->config_.core_connections_per_host(); diff --git a/src/pool.hpp b/src/pool.hpp index 6ee03fb10..5aee1fd3a 100644 --- a/src/pool.hpp +++ b/src/pool.hpp @@ -27,11 +27,6 @@ #include "scoped_ptr.hpp" #include "timer.hpp" -#include -#include -#include -#include - namespace cass { class IOWorker; @@ -106,7 +101,6 @@ class Pool : public RefCounted Connection* find_least_busy(); private: - typedef std::set ConnectionSet; typedef std::vector ConnectionVec; IOWorker* io_worker_; @@ -118,7 +112,7 @@ class Pool : public RefCounted PoolState state_; Connection::ConnectionError error_code_; ConnectionVec connections_; - ConnectionSet connections_pending_; + ConnectionVec pending_connections_; List pending_requests_; int available_connection_count_; bool is_available_; diff --git a/src/ref_counted.hpp b/src/ref_counted.hpp index bf58998f2..08330bbbf 100644 --- a/src/ref_counted.hpp +++ b/src/ref_counted.hpp @@ -125,11 +125,11 @@ class SharedRefPtr { } } - bool operator==(const T* ptr) { + bool operator==(const T* ptr) const { return ptr_ == ptr; } - bool operator==(const SharedRefPtr& ref) { + bool operator==(const SharedRefPtr& ref) const { return ptr_ == ref.ptr_; } diff --git a/src/replication_strategy.cpp b/src/replication_strategy.cpp deleted file mode 100644 index 85b5f3953..000000000 --- a/src/replication_strategy.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* - Copyright (c) 2014-2016 DataStax - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "replication_strategy.hpp" - -#include "logger.hpp" -#include "map_iterator.hpp" -#include "metadata.hpp" -#include "token_map.hpp" -#include "utils.hpp" - -#include -#include - -namespace cass { - -static void build_dc_replicas(const KeyspaceMetadata& ks_meta, - NetworkTopologyStrategy::DCReplicaCountMap* output) { - const Value* strategy_options = ks_meta.strategy_options(); - if (strategy_options->is_map()) { - MapIterator iterator(strategy_options); - while (iterator.next()) { - if (iterator.key()->to_string_ref() != "class") { - size_t replication_factor = strtoul(iterator.value()->to_string().c_str(), NULL, 10); - if (replication_factor > 0) { - (*output)[iterator.key()->to_string()] = replication_factor; - } - } - } - } -} - -static size_t get_replication_factor(const KeyspaceMetadata& ks_meta) { - size_t replication_factor = 0; - const Value* strategy_options = ks_meta.strategy_options(); - if (strategy_options->is_map()) { - MapIterator iterator(strategy_options); - while (iterator.next()) { - if (iterator.key()->to_string_ref() == "replication_factor") { - replication_factor = strtoul(iterator.value()->to_string().c_str(), NULL, 10); - } - } - } - if (replication_factor == 0) { - LOG_WARN("Replication factor of 0"); - } - return replication_factor; -} - - -SharedRefPtr ReplicationStrategy::from_keyspace_meta(const KeyspaceMetadata& ks_meta) { - StringRef strategy_class = ks_meta.strategy_class(); - - SharedRefPtr strategy; - if (ends_with(strategy_class, NetworkTopologyStrategy::STRATEGY_CLASS)) { - NetworkTopologyStrategy::DCReplicaCountMap replication_factors; - build_dc_replicas(ks_meta, &replication_factors); - return SharedRefPtr(new NetworkTopologyStrategy(strategy_class.to_string(), - replication_factors)); - } else if (ends_with(strategy_class, SimpleStrategy::STRATEGY_CLASS)) { - size_t replication_factor = get_replication_factor(ks_meta); - return SharedRefPtr(new SimpleStrategy(strategy_class.to_string(), replication_factor)); - } else { - return SharedRefPtr(new NonReplicatedStrategy(strategy_class.to_string())); - } -} - -const std::string NetworkTopologyStrategy::STRATEGY_CLASS("NetworkTopologyStrategy"); - -bool NetworkTopologyStrategy::equal(const KeyspaceMetadata& ks_meta) { - if (ks_meta.strategy_class() != strategy_class_) return false; - DCReplicaCountMap temp_rfs; - build_dc_replicas(ks_meta, &temp_rfs); - return replication_factors_ == temp_rfs; -} - -typedef std::map > DCRackMap; -static DCRackMap racks_in_dcs(const TokenHostMap& token_hosts) { - DCRackMap racks; - for (TokenHostMap::const_iterator i = token_hosts.begin(); - i != token_hosts.end(); ++i) { - const std::string& dc = i->second->dc(); - const std::string& rack = i->second->rack(); - if (!dc.empty() && !rack.empty()) { - racks[dc].insert(rack); - } - } - return racks; -} - -void NetworkTopologyStrategy::tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const { - DCRackMap racks = racks_in_dcs(primary); - - output->clear(); - - for (TokenHostMap::const_iterator i = primary.begin(); i != primary.end(); ++i) { - DCReplicaCountMap replica_counts; - std::map > racks_observed; - std::map > > skipped_endpoints; - - CopyOnWriteHostVec replicas(new HostVec()); - TokenHostMap::const_iterator j = i; - for (size_t count = 0; count < primary.size() && replica_counts != replication_factors_; ++count) { - const SharedRefPtr& host = j->second; - const std::string& dc = host->dc(); - - ++j; - if (j == primary.end()) { - j = primary.begin(); - } - - DCReplicaCountMap::const_iterator rf_it = replication_factors_.find(dc); - if (dc.empty() || rf_it == replication_factors_.end()) { - continue; - } - - const size_t rf = rf_it->second; - size_t& replica_count_this_dc = replica_counts[dc]; - if (replica_count_this_dc >= rf) { - continue; - } - - const size_t rack_count_this_dc = racks[dc].size(); - std::set& racks_observed_this_dc = racks_observed[dc]; - const std::string& rack = host->rack(); - - if (rack.empty() || racks_observed_this_dc.size() == rack_count_this_dc) { - ++replica_count_this_dc; - replicas->push_back(host); - } else { - if (racks_observed_this_dc.count(rack) > 0) { - skipped_endpoints[dc].push_back(host); - } else { - ++replica_count_this_dc; - replicas->push_back(host); - racks_observed_this_dc.insert(rack); - - if (racks_observed_this_dc.size() == rack_count_this_dc) { - std::list >& skipped_endpoints_this_dc = skipped_endpoints[dc]; - while (!skipped_endpoints_this_dc.empty() && replica_count_this_dc < rf) { - ++replica_count_this_dc; - replicas->push_back(skipped_endpoints_this_dc.front()); - skipped_endpoints_this_dc.pop_front(); - } - } - } - } - } - - output->insert(std::make_pair(i->first, replicas)); - } -} - -const std::string SimpleStrategy::STRATEGY_CLASS("SimpleStrategy"); - -bool SimpleStrategy::equal(const KeyspaceMetadata& ks_meta) { - if (ks_meta.strategy_class() != strategy_class_) return false; - return replication_factor_ == get_replication_factor(ks_meta); -} - -void SimpleStrategy::tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const { - size_t target_replicas = std::min(replication_factor_, primary.size()); - output->clear(); - for (TokenHostMap::const_iterator i = primary.begin(); i != primary.end(); ++i) { - CopyOnWriteHostVec token_replicas(new HostVec()); - TokenHostMap::const_iterator j = i; - do { - token_replicas->push_back(j->second); - ++j; - if (j == primary.end()) { - j = primary.begin(); - } - } while (token_replicas->size() < target_replicas); - output->insert(std::make_pair(i->first, token_replicas)); - } -} - -bool NonReplicatedStrategy::equal(const KeyspaceMetadata& ks_meta) { - return ks_meta.strategy_class() == strategy_class_; -} - -void NonReplicatedStrategy::tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const { - output->clear(); - for (TokenHostMap::const_iterator i = primary.begin(); i != primary.end(); ++i) { - CopyOnWriteHostVec token_replicas(new HostVec(1, i->second)); - output->insert(std::make_pair(i->first, token_replicas)); - } -} - -} diff --git a/src/replication_strategy.hpp b/src/replication_strategy.hpp deleted file mode 100644 index cddac3562..000000000 --- a/src/replication_strategy.hpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - Copyright (c) 2014-2016 DataStax - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __CASS_REPLICATION_STRATEGY_HPP_INCLUDED__ -#define __CASS_REPLICATION_STRATEGY_HPP_INCLUDED__ - -#include "buffer.hpp" -#include "host.hpp" -#include "ref_counted.hpp" - -#include - -namespace cass { - -class KeyspaceMetadata; -class Value; - -typedef std::vector Token; -typedef std::map > TokenHostMap; -typedef std::map TokenReplicaMap; - -class ReplicationStrategy : public RefCounted { -public: - static SharedRefPtr from_keyspace_meta(const KeyspaceMetadata& ks_meta); - - ReplicationStrategy(const std::string& strategy_class) - : strategy_class_(strategy_class) { } - - virtual ~ReplicationStrategy() { } - virtual bool equal(const KeyspaceMetadata& ks_meta) = 0; - virtual void tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const = 0; - -protected: - std::string strategy_class_; -}; - - -class NetworkTopologyStrategy : public ReplicationStrategy { -public: - typedef std::map DCReplicaCountMap; - - static const std::string STRATEGY_CLASS; - - NetworkTopologyStrategy(const std::string& strategy_class, - const DCReplicaCountMap& replication_factors) - : ReplicationStrategy(strategy_class) - , replication_factors_(replication_factors) { } - - virtual ~NetworkTopologyStrategy() { } - - virtual bool equal(const KeyspaceMetadata& ks_meta); - virtual void tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const; - -private: - DCReplicaCountMap replication_factors_; -}; - - -class SimpleStrategy : public ReplicationStrategy { -public: - static const std::string STRATEGY_CLASS; - - SimpleStrategy(const std::string& strategy_class, - size_t replication_factor) - : ReplicationStrategy(strategy_class) - , replication_factor_(replication_factor) { } - - virtual ~SimpleStrategy() { } - - virtual bool equal(const KeyspaceMetadata& ks_meta); - virtual void tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const; - -private: - size_t replication_factor_; -}; - - -class NonReplicatedStrategy : public ReplicationStrategy { -public: - NonReplicatedStrategy(const std::string& strategy_class) - : ReplicationStrategy(strategy_class) { } - virtual ~NonReplicatedStrategy() { } - - virtual bool equal(const KeyspaceMetadata& ks_meta); - virtual void tokens_to_replicas(const TokenHostMap& primary, TokenReplicaMap* output) const; -}; - -} // namespace cass - -#endif diff --git a/src/request_handler.hpp b/src/request_handler.hpp index 52fe9bfeb..d63bf65dc 100644 --- a/src/request_handler.hpp +++ b/src/request_handler.hpp @@ -41,9 +41,9 @@ class Timer; class ResponseFuture : public Future { public: - ResponseFuture(const Metadata& metadata) + ResponseFuture(int protocol_version, const VersionNumber& cassandra_version, const Metadata& metadata) : Future(CASS_FUTURE_TYPE_RESPONSE) - , schema_metadata(metadata.schema_snapshot()) { } + , schema_metadata(metadata.schema_snapshot(protocol_version, cassandra_version)) { } void set_response(Address address, const SharedRefPtr& response) { ScopedMutex lock(&mutex_); diff --git a/src/result_response.cpp b/src/result_response.cpp index 8c0fbc4cd..0670ff44a 100644 --- a/src/result_response.cpp +++ b/src/result_response.cpp @@ -288,7 +288,9 @@ char* ResultResponse::decode_metadata(char* input, SharedRefPtr* } void ResultResponse::decode_first_row() { - if (row_count_ > 0) { + if (row_count_ > 0 && + metadata_ && // Valid metadata required for column count + first_row_.values.empty()) { // Only decode the first row once first_row_.values.reserve(column_count()); rows_ = decode_row(rows_, this, first_row_.values); } @@ -297,6 +299,7 @@ void ResultResponse::decode_first_row() { bool ResultResponse::decode_rows(char* input) { char* buffer = decode_metadata(input, &metadata_); rows_ = decode_int32(buffer, row_count_); + decode_first_row(); return true; } diff --git a/src/result_response.hpp b/src/result_response.hpp index 809ef5463..0a174da82 100644 --- a/src/result_response.hpp +++ b/src/result_response.hpp @@ -61,6 +61,7 @@ class ResultResponse : public Response { void set_metadata(ResultMetadata* metadata) { metadata_.reset(metadata); + decode_first_row(); } const SharedRefPtr& result_metadata() const { return result_metadata_; } @@ -80,12 +81,12 @@ class ResultResponse : public Response { bool decode(int version, char* input, size_t size); - void decode_first_row(); - private: char* decode_metadata(char* input, SharedRefPtr* metadata, bool has_pk_indices = false); + void decode_first_row(); + bool decode_rows(char* input); bool decode_set_keyspace(char* input); diff --git a/src/round_robin_policy.hpp b/src/round_robin_policy.hpp index 96600e39e..b6dec324b 100644 --- a/src/round_robin_policy.hpp +++ b/src/round_robin_policy.hpp @@ -42,7 +42,7 @@ class RoundRobinPolicy : public LoadBalancingPolicy { virtual QueryPlan* new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache) { return new RoundRobinQueryPlan(hosts_, index_++); } diff --git a/src/schema_change_handler.cpp b/src/schema_change_handler.cpp index b8adc4981..37caa27af 100644 --- a/src/schema_change_handler.cpp +++ b/src/schema_change_handler.cpp @@ -56,8 +56,6 @@ bool SchemaChangeHandler::has_schema_agreement(const ResponseMap& responses) { if (MultipleRequestHandler::get_result_response(responses, "local", &local_result) && local_result->row_count() > 0) { - local_result->decode_first_row(); - const Row* row = &local_result->first_row(); const Value* v = row->get_by_name("schema_version"); @@ -71,8 +69,6 @@ bool SchemaChangeHandler::has_schema_agreement(const ResponseMap& responses) { ResultResponse* peers_result; if (MultipleRequestHandler::get_result_response(responses, "peers", &peers_result)) { - peers_result->decode_first_row(); - ResultIterator rows(peers_result); while (rows.next()) { const Row* row = rows.row(); diff --git a/src/serialization.hpp b/src/serialization.hpp index 74ce0afaa..3f6ce1f7c 100644 --- a/src/serialization.hpp +++ b/src/serialization.hpp @@ -124,16 +124,16 @@ inline void encode_int64(char* output, cass_int64_t value) { output[7] = static_cast(value >> 0); } -inline void encode_uint64(uint8_t* output, uint64_t value) { +inline void encode_uint64(char* output, uint64_t value) { STATIC_ASSERT(sizeof(cass_uint64_t) == 8); - output[0] = static_cast(value >> 56); - output[1] = static_cast(value >> 48); - output[2] = static_cast(value >> 40); - output[3] = static_cast(value >> 32); - output[4] = static_cast(value >> 24); - output[5] = static_cast(value >> 16); - output[6] = static_cast(value >> 8); - output[7] = static_cast(value >> 0); + output[0] = static_cast(static_cast(value >> 56)); + output[1] = static_cast(static_cast(value >> 48)); + output[2] = static_cast(static_cast(value >> 40)); + output[3] = static_cast(static_cast(value >> 32)); + output[4] = static_cast(static_cast(value >> 24)); + output[5] = static_cast(static_cast(value >> 16)); + output[6] = static_cast(static_cast(value >> 8)); + output[7] = static_cast(static_cast(value >> 0)); } inline char* decode_int64(char* input, cass_int64_t& output) { diff --git a/src/session.cpp b/src/session.cpp index 90e14543d..68e21026a 100644 --- a/src/session.cpp +++ b/src/session.cpp @@ -92,7 +92,7 @@ CassFuture* cass_session_execute_batch(CassSession* session, const CassBatch* ba } const CassSchemaMeta* cass_session_get_schema_meta(const CassSession* session) { - return CassSchemaMeta::to(new cass::Metadata::SchemaSnapshot(session->metadata().schema_snapshot())); + return CassSchemaMeta::to(new cass::Metadata::SchemaSnapshot(session->metadata().schema_snapshot(session->protocol_version(), session->cassandra_version()))); } void cass_session_get_metrics(const CassSession* session, @@ -572,7 +572,7 @@ Future* Session::prepare(const char* statement, size_t length) { PrepareRequest* prepare = new PrepareRequest(); prepare->set_query(statement, length); - ResponseFuture* future = new ResponseFuture(metadata_); + ResponseFuture* future = new ResponseFuture(protocol_version(), cassandra_version(), metadata_); future->inc_ref(); // External reference future->statement.assign(statement, length); @@ -662,7 +662,7 @@ void Session::on_down(SharedRefPtr host) { } Future* Session::execute(const RoutableRequest* request) { - ResponseFuture* future = new ResponseFuture(metadata_); + ResponseFuture* future = new ResponseFuture(protocol_version(), cassandra_version(), metadata_); future->inc_ref(); // External reference RetryPolicy* retry_policy @@ -738,8 +738,7 @@ void Session::on_execute(uv_async_t* data) { QueryPlan* Session::new_query_plan(const Request* request, Request::EncodingCache* cache) { const CopyOnWritePtr keyspace(keyspace_); - return load_balancing_policy_->new_query_plan(*keyspace, request, - metadata_.token_map(), cache); + return load_balancing_policy_->new_query_plan(*keyspace, request, token_map_.get(), cache); } } // namespace cass diff --git a/src/session.hpp b/src/session.hpp index 93163003a..61372d085 100644 --- a/src/session.hpp +++ b/src/session.hpp @@ -32,6 +32,7 @@ #include "row.hpp" #include "scoped_lock.hpp" #include "scoped_ptr.hpp" +#include "token_map.hpp" #include #include @@ -107,6 +108,10 @@ class Session : public EventThread { return control_connection_.protocol_version(); } + const VersionNumber& cassandra_version() const { + return control_connection_.cassandra_version(); + } + private: void clear(const Config& config); int init(); @@ -195,7 +200,10 @@ class Session : public EventThread { IOWorkerVec io_workers_; ScopedPtr > > request_queue_; + + ScopedPtr token_map_; Metadata metadata_; + ControlConnection control_connection_; bool current_host_mark_; int pending_pool_count_; diff --git a/src/stream_manager.hpp b/src/stream_manager.hpp index d35108f19..e2f0c245f 100644 --- a/src/stream_manager.hpp +++ b/src/stream_manager.hpp @@ -24,11 +24,7 @@ #include #include -#ifdef CASS_USE_SPARSEHASH -#include -#else -#include -#endif +#include #if defined(_MSC_VER) #include @@ -44,12 +40,10 @@ class StreamManager { , num_words_(max_streams_ / NUM_BITS_PER_WORD) , offset_(0) , words_(new word_t[num_words_]) { -#ifdef CASS_USE_SPARSEHASH // Client request stream IDs are always positive values so it's // safe to use negative values for the empty and deleted keys. pending_.set_empty_key(-1); pending_.set_deleted_key(-2); -#endif memset(words_.get(), 0xFF, sizeof(word_t) * num_words_); } @@ -82,11 +76,7 @@ class StreamManager { size_t max_streams() const { return max_streams_; } private: -#ifdef CASS_USE_SPARSEHASH - typedef google::dense_hash_map PendingMap; -#else - typedef std::map PendingMap; -#endif + typedef sparsehash::dense_hash_map PendingMap; #if defined(_MSC_VER) && defined(_M_AMD64) typedef __int64 word_t; diff --git a/src/third_party/sparsehash/AUTHORS b/src/third_party/sparsehash/AUTHORS new file mode 100644 index 000000000..d8c24c64c --- /dev/null +++ b/src/third_party/sparsehash/AUTHORS @@ -0,0 +1,2 @@ +google-sparsehash@googlegroups.com + diff --git a/src/third_party/sparsehash/CMakeLists.txt b/src/third_party/sparsehash/CMakeLists.txt new file mode 100644 index 000000000..e604ec395 --- /dev/null +++ b/src/third_party/sparsehash/CMakeLists.txt @@ -0,0 +1,108 @@ +cmake_minimum_required(VERSION 2.6.4) + +include(CheckCXXSourceCompiles) +include(CheckFunctionExists) +include(CheckIncludeFile) +include(CheckTypeSize) + +include(TestCXXAcceptsFlag) + +set(HASH_NAME hash) + +set(SAVED_CMAKE_REQUIRED_QUIET ${CMAKE_REQUIRED_FLAGS_QUIET}) +set(SAVED_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + +set(CMAKE_REQUIRED_QUIET TRUE) + +check_cxx_accepts_flag("-std=c++11" HAVE_CXX11) +if(HAVE_CXX11) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11") +endif() + +foreach(LOCATION unordered_map;tr1/unordered_map) + if (HASH_NAMESPACE) + break() + endif() + foreach(NAMESPACE std;std::tr1) + unset(HAVE_UNORDERED_MAP CACHE) + check_cxx_source_compiles( + " + #include <${LOCATION}> + int main() { + ${NAMESPACE}::unordered_map m; + return m.find(42) == m.end(); + } + " + HAVE_UNORDERED_MAP) + if(${HAVE_UNORDERED_MAP}) + set(HASH_NAMESPACE ${NAMESPACE}) + endif() + endforeach(NAMESPACE) +endforeach(LOCATION) + +foreach(LOCATION hash_map;ext/hash_map) + if (HASH_NAMESPACE) + break() + endif() + foreach(NAMESPACE std;stdext) + if (HASH_NAMESPACE) + break() + endif() + unset(HAVE_HASH_MAP CACHE) + check_cxx_source_compiles( + " + #include <${LOCATION}> + int main() { + ${NAMESPACE}::unordered_map m; + return m.find(42) == m.end(); + } + " + HAVE_HASH_MAP) + if(${HAVE_HASH_MAP}) + set(HASH_NAMESPACE ${NAMESPACE}) + endif() + endforeach(NAMESPACE) +endforeach(LOCATION) + +if(NOT HASH_NAMESPACE) + message(FATAL_ERROR "Unable to determine the standard hash namespace") +endif() + +foreach(LOCATION functional;tr1/functional;ext/hash_fun.h;ext/stl_hash_fun.h;hash_fun.h;stl_hash_fun.h;stl/_hash_fun.h) + if (HASH_FUN_H) + break() + endif() + unset(HAVE_HASH_FUNC_HEADER CACHE) + check_cxx_source_compiles( + " + #include <${LOCATION}> + int main() { int x = ${HASH_NAMESPACE}::hash()(42); } + " + HAVE_HASH_FUNC_HEADER) + if (${HAVE_HASH_FUNC_HEADER}) + set(HASH_FUN_H ${LOCATION}) + endif() +endforeach(LOCATION) + +if(NOT HASH_FUN_H) + message(FATAL_ERROR "Unable to find standard hash header file") +endif() + +message(STATUS "Using hash header <${HASH_FUN_H}> and namespace \"${HASH_NAMESPACE}\"") + +set(CMAKE_REQUIRED_QUIET ${SAVED_CMAKE_REQUIRED_QUIET}) +set(CMAKE_REQUIRED_FLAGS ${SAVED_CMAKE_REQUIRED_FLAGS}) + +check_include_file(inttypes.h HAVE_INTTYPES_H) +check_include_file(stdint.h HAVE_STDINT_H) +check_include_file(sys/types.h HAVE_SYS_TYPES_H) + +check_function_exists(memcpy HAVE_MEMCPY) + +check_type_size("long long" LONG_LONG) +check_type_size("uint16_t" UINT16_T) +check_type_size("u_int16_t" U_INT16_T) +check_type_size("__uint16_t" __UINT16_T) + +configure_file("${PROJECT_SOURCE_DIR}/src/third_party/sparsehash/config.h.cmake" + "${PROJECT_SOURCE_DIR}/src/third_party/sparsehash/src/sparsehash/internal/sparseconfig.h") diff --git a/src/third_party/sparsehash/COPYING b/src/third_party/sparsehash/COPYING new file mode 100644 index 000000000..e4956cfd9 --- /dev/null +++ b/src/third_party/sparsehash/COPYING @@ -0,0 +1,28 @@ +Copyright (c) 2005, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/third_party/sparsehash/config.h.cmake b/src/third_party/sparsehash/config.h.cmake new file mode 100644 index 000000000..ecaf8bf80 --- /dev/null +++ b/src/third_party/sparsehash/config.h.cmake @@ -0,0 +1,45 @@ +#cmakedefine HASH_FUN_H <@HASH_FUN_H@> + +/* the namespace of the hash<> function */ +#cmakedefine HASH_NAMESPACE @HASH_NAMESPACE@ + +#cmakedefine HASH_NAME @HASH_NAME@ + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_TYPES_H 1 + +/* Define to 1 if the system has the type `long long'. */ +#cmakedefine HAVE_LONG_LONG 1 + +/* Define to 1 if you have the `memcpy' function. */ +#cmakedefine HAVE_MEMCPY 1 + +/* Define to 1 if the system has the type `uint16_t'. */ +#cmakedefine HAVE_UINT16_T 1 + +/* Define to 1 if the system has the type `u_int16_t'. */ +#cmakedefine HAVE_U_INT16_T 1 + +/* Define to 1 if the system has the type `__uint16'. */ +#cmakedefine HAVE___UINT16 1 + +/* The system-provided hash function including the namespace. */ +#define SPARSEHASH_HASH HASH_NAMESPACE::HASH_NAME + +/* The system-provided hash function, in namespace HASH_NAMESPACE. */ +#define SPARSEHASH_HASH_NO_NAMESPACE HASH_NAME + +/* Namespace for Google classes */ +#define GOOGLE_NAMESPACE ::sparsehash + +/* Stops putting the code inside the Google namespace */ +#define _END_GOOGLE_NAMESPACE_ } + +/* Puts following code inside the Google namespace */ +#define _START_GOOGLE_NAMESPACE_ namespace sparsehash { diff --git a/src/third_party/sparsehash/src/sparsehash/dense_hash_map b/src/third_party/sparsehash/src/sparsehash/dense_hash_map new file mode 100644 index 000000000..05fd580e6 --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/dense_hash_map @@ -0,0 +1,369 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---- +// +// This is just a very thin wrapper over densehashtable.h, just +// like sgi stl's stl_hash_map is a very thin wrapper over +// stl_hashtable. The major thing we define is operator[], because +// we have a concept of a data_type which stl_hashtable doesn't +// (it only has a key and a value). +// +// NOTE: this is exactly like sparse_hash_map.h, with the word +// "sparse" replaced by "dense", except for the addition of +// set_empty_key(). +// +// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. +// +// Otherwise your program will die in mysterious ways. (Note if you +// use the constructor that takes an InputIterator range, you pass in +// the empty key in the constructor, rather than after. As a result, +// this constructor differs from the standard STL version.) +// +// In other respects, we adhere mostly to the STL semantics for +// hash-map. One important exception is that insert() may invalidate +// iterators entirely -- STL semantics are that insert() may reorder +// iterators, but they all still refer to something valid in the +// hashtable. Not so for us. Likewise, insert() may invalidate +// pointers into the hashtable. (Whether insert invalidates iterators +// and pointers depends on whether it results in a hashtable resize). +// On the plus side, delete() doesn't invalidate iterators or pointers +// at all, or even change the ordering of elements. +// +// Here are a few "power user" tips: +// +// 1) set_deleted_key(): +// If you want to use erase() you *must* call set_deleted_key(), +// in addition to set_empty_key(), after construction. +// The deleted and empty keys must differ. +// +// 2) resize(0): +// When an item is deleted, its memory isn't freed right +// away. This allows you to iterate over a hashtable, +// and call erase(), without invalidating the iterator. +// To force the memory to be freed, call resize(0). +// For tr1 compatibility, this can also be called as rehash(0). +// +// 3) min_load_factor(0.0) +// Setting the minimum load factor to 0.0 guarantees that +// the hash table will never shrink. +// +// Roughly speaking: +// (1) dense_hash_map: fastest, uses the most memory unless entries are small +// (2) sparse_hash_map: slowest, uses the least memory +// (3) hash_map / unordered_map (STL): in the middle +// +// Typically I use sparse_hash_map when I care about space and/or when +// I need to save the hashtable on disk. I use hash_map otherwise. I +// don't personally use dense_hash_set ever; some people use it for +// small sets with lots of lookups. +// +// - dense_hash_map has, typically, about 78% memory overhead (if your +// data takes up X bytes, the hash_map uses .78X more bytes in overhead). +// - sparse_hash_map has about 4 bits overhead per entry. +// - sparse_hash_map can be 3-7 times slower than the others for lookup and, +// especially, inserts. See time_hash_map.cc for details. +// +// See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html +// for information about how to use this class. + +#ifndef _DENSE_HASH_MAP_H_ +#define _DENSE_HASH_MAP_H_ + +#include +#include // needed by stl_alloc +#include // for equal_to<>, select1st<>, etc +#include // for alloc +#include // for pair<> +#include // IWYU pragma: export +#include +#include HASH_FUN_H // for hash<> +_START_GOOGLE_NAMESPACE_ + +template , // defined in sparseconfig.h + class EqualKey = std::equal_to, + class Alloc = libc_allocator_with_realloc > > +class dense_hash_map { + private: + // Apparently select1st is not stl-standard, so we define our own + struct SelectKey { + typedef const Key& result_type; + const Key& operator()(const std::pair& p) const { + return p.first; + } + }; + struct SetKey { + void operator()(std::pair* value, const Key& new_key) const { + *const_cast(&value->first) = new_key; + // It would be nice to clear the rest of value here as well, in + // case it's taking up a lot of memory. We do this by clearing + // the value. This assumes T has a zero-arg constructor! + value->second = T(); + } + }; + // For operator[]. + struct DefaultValue { + std::pair operator()(const Key& key) { + return std::make_pair(key, T()); + } + }; + + // The actual data + typedef dense_hashtable, Key, HashFcn, SelectKey, + SetKey, EqualKey, Alloc> ht; + ht rep; + + public: + typedef typename ht::key_type key_type; + typedef T data_type; + typedef T mapped_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef Alloc allocator_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::pointer pointer; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::reference reference; + typedef typename ht::const_reference const_reference; + + typedef typename ht::iterator iterator; + typedef typename ht::const_iterator const_iterator; + typedef typename ht::local_iterator local_iterator; + typedef typename ht::const_local_iterator const_local_iterator; + + // Iterator functions + iterator begin() { return rep.begin(); } + iterator end() { return rep.end(); } + const_iterator begin() const { return rep.begin(); } + const_iterator end() const { return rep.end(); } + + + // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements. + local_iterator begin(size_type i) { return rep.begin(i); } + local_iterator end(size_type i) { return rep.end(i); } + const_local_iterator begin(size_type i) const { return rep.begin(i); } + const_local_iterator end(size_type i) const { return rep.end(i); } + + // Accessor functions + allocator_type get_allocator() const { return rep.get_allocator(); } + hasher hash_funct() const { return rep.hash_funct(); } + hasher hash_function() const { return hash_funct(); } + key_equal key_eq() const { return rep.key_eq(); } + + + // Constructors + explicit dense_hash_map(size_type expected_max_items_in_table = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) { + } + + template + dense_hash_map(InputIterator f, InputIterator l, + const key_type& empty_key_val, + size_type expected_max_items_in_table = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) { + set_empty_key(empty_key_val); + rep.insert(f, l); + } + // We use the default copy constructor + // We use the default operator=() + // We use the default destructor + + void clear() { rep.clear(); } + // This clears the hash map without resizing it down to the minimum + // bucket count, but rather keeps the number of buckets constant + void clear_no_resize() { rep.clear_no_resize(); } + void swap(dense_hash_map& hs) { rep.swap(hs.rep); } + + + // Functions concerning size + size_type size() const { return rep.size(); } + size_type max_size() const { return rep.max_size(); } + bool empty() const { return rep.empty(); } + size_type bucket_count() const { return rep.bucket_count(); } + size_type max_bucket_count() const { return rep.max_bucket_count(); } + + // These are tr1 methods. bucket() is the bucket the key is or would be in. + size_type bucket_size(size_type i) const { return rep.bucket_size(i); } + size_type bucket(const key_type& key) const { return rep.bucket(key); } + float load_factor() const { + return size() * 1.0f / bucket_count(); + } + float max_load_factor() const { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + return grow; + } + void max_load_factor(float new_grow) { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + rep.set_resizing_parameters(shrink, new_grow); + } + // These aren't tr1 methods but perhaps ought to be. + float min_load_factor() const { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + return shrink; + } + void min_load_factor(float new_shrink) { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + rep.set_resizing_parameters(new_shrink, grow); + } + // Deprecated; use min_load_factor() or max_load_factor() instead. + void set_resizing_parameters(float shrink, float grow) { + rep.set_resizing_parameters(shrink, grow); + } + + void resize(size_type hint) { rep.resize(hint); } + void rehash(size_type hint) { resize(hint); } // the tr1 name + + // Lookup routines + iterator find(const key_type& key) { return rep.find(key); } + const_iterator find(const key_type& key) const { return rep.find(key); } + + data_type& operator[](const key_type& key) { // This is our value-add! + // If key is in the hashtable, returns find(key)->second, + // otherwise returns insert(value_type(key, T()).first->second. + // Note it does not create an empty T unless the find fails. + return rep.template find_or_insert(key).second; + } + + size_type count(const key_type& key) const { return rep.count(key); } + + std::pair equal_range(const key_type& key) { + return rep.equal_range(key); + } + std::pair equal_range(const key_type& key) + const { + return rep.equal_range(key); + } + + + // Insertion routines + std::pair insert(const value_type& obj) { + return rep.insert(obj); + } + template void insert(InputIterator f, InputIterator l) { + rep.insert(f, l); + } + void insert(const_iterator f, const_iterator l) { + rep.insert(f, l); + } + // Required for std::insert_iterator; the passed-in iterator is ignored. + iterator insert(iterator, const value_type& obj) { + return insert(obj).first; + } + + // Deletion and empty routines + // THESE ARE NON-STANDARD! I make you specify an "impossible" key + // value to identify deleted and empty buckets. You can change the + // deleted key as time goes on, or get rid of it entirely to be insert-only. + void set_empty_key(const key_type& key) { // YOU MUST CALL THIS! + rep.set_empty_key(value_type(key, data_type())); // rep wants a value + } + key_type empty_key() const { + return rep.empty_key().first; // rep returns a value + } + + void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } + void clear_deleted_key() { rep.clear_deleted_key(); } + key_type deleted_key() const { return rep.deleted_key(); } + + // These are standard + size_type erase(const key_type& key) { return rep.erase(key); } + void erase(iterator it) { rep.erase(it); } + void erase(iterator f, iterator l) { rep.erase(f, l); } + + + // Comparison + bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; } + bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; } + + + // I/O -- this is an add-on for writing hash map to disk + // + // For maximum flexibility, this does not assume a particular + // file type (though it will probably be a FILE *). We just pass + // the fp through to rep. + + // If your keys and values are simple enough, you can pass this + // serializer to serialize()/unserialize(). "Simple enough" means + // value_type is a POD type that contains no pointers. Note, + // however, we don't try to normalize endianness. + typedef typename ht::NopointerSerializer NopointerSerializer; + + // serializer: a class providing operator()(OUTPUT*, const value_type&) + // (writing value_type to OUTPUT). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a + // pointer to a class providing size_t Write(const void*, size_t), + // which writes a buffer into a stream (which fp presumably + // owns) and returns the number of bytes successfully written. + // Note basic_ostream is not currently supported. + template + bool serialize(ValueSerializer serializer, OUTPUT* fp) { + return rep.serialize(serializer, fp); + } + + // serializer: a functor providing operator()(INPUT*, value_type*) + // (reading from INPUT and into value_type). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a + // pointer to a class providing size_t Read(void*, size_t), + // which reads into a buffer from a stream (which fp presumably + // owns) and returns the number of bytes successfully read. + // Note basic_istream is not currently supported. + // NOTE: Since value_type is std::pair, ValueSerializer + // may need to do a const cast in order to fill in the key. + template + bool unserialize(ValueSerializer serializer, INPUT* fp) { + return rep.unserialize(serializer, fp); + } +}; + +// We need a global swap as well +template +inline void swap(dense_hash_map& hm1, + dense_hash_map& hm2) { + hm1.swap(hm2); +} + +_END_GOOGLE_NAMESPACE_ + +#endif /* _DENSE_HASH_MAP_H_ */ diff --git a/src/third_party/sparsehash/src/sparsehash/dense_hash_set b/src/third_party/sparsehash/src/sparsehash/dense_hash_set new file mode 100644 index 000000000..050b15d1d --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/dense_hash_set @@ -0,0 +1,338 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// +// This is just a very thin wrapper over densehashtable.h, just +// like sgi stl's stl_hash_set is a very thin wrapper over +// stl_hashtable. The major thing we define is operator[], because +// we have a concept of a data_type which stl_hashtable doesn't +// (it only has a key and a value). +// +// This is more different from dense_hash_map than you might think, +// because all iterators for sets are const (you obviously can't +// change the key, and for sets there is no value). +// +// NOTE: this is exactly like sparse_hash_set.h, with the word +// "sparse" replaced by "dense", except for the addition of +// set_empty_key(). +// +// YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION. +// +// Otherwise your program will die in mysterious ways. (Note if you +// use the constructor that takes an InputIterator range, you pass in +// the empty key in the constructor, rather than after. As a result, +// this constructor differs from the standard STL version.) +// +// In other respects, we adhere mostly to the STL semantics for +// hash-map. One important exception is that insert() may invalidate +// iterators entirely -- STL semantics are that insert() may reorder +// iterators, but they all still refer to something valid in the +// hashtable. Not so for us. Likewise, insert() may invalidate +// pointers into the hashtable. (Whether insert invalidates iterators +// and pointers depends on whether it results in a hashtable resize). +// On the plus side, delete() doesn't invalidate iterators or pointers +// at all, or even change the ordering of elements. +// +// Here are a few "power user" tips: +// +// 1) set_deleted_key(): +// If you want to use erase() you must call set_deleted_key(), +// in addition to set_empty_key(), after construction. +// The deleted and empty keys must differ. +// +// 2) resize(0): +// When an item is deleted, its memory isn't freed right +// away. This allows you to iterate over a hashtable, +// and call erase(), without invalidating the iterator. +// To force the memory to be freed, call resize(0). +// For tr1 compatibility, this can also be called as rehash(0). +// +// 3) min_load_factor(0.0) +// Setting the minimum load factor to 0.0 guarantees that +// the hash table will never shrink. +// +// Roughly speaking: +// (1) dense_hash_set: fastest, uses the most memory unless entries are small +// (2) sparse_hash_set: slowest, uses the least memory +// (3) hash_set / unordered_set (STL): in the middle +// +// Typically I use sparse_hash_set when I care about space and/or when +// I need to save the hashtable on disk. I use hash_set otherwise. I +// don't personally use dense_hash_set ever; some people use it for +// small sets with lots of lookups. +// +// - dense_hash_set has, typically, about 78% memory overhead (if your +// data takes up X bytes, the hash_set uses .78X more bytes in overhead). +// - sparse_hash_set has about 4 bits overhead per entry. +// - sparse_hash_set can be 3-7 times slower than the others for lookup and, +// especially, inserts. See time_hash_map.cc for details. +// +// See /usr/(local/)?doc/sparsehash-*/dense_hash_set.html +// for information about how to use this class. + +#ifndef _DENSE_HASH_SET_H_ +#define _DENSE_HASH_SET_H_ + +#include +#include // needed by stl_alloc +#include // for equal_to<>, select1st<>, etc +#include // for alloc +#include // for pair<> +#include // IWYU pragma: export +#include +#include HASH_FUN_H // for hash<> +_START_GOOGLE_NAMESPACE_ + +template , // defined in sparseconfig.h + class EqualKey = std::equal_to, + class Alloc = libc_allocator_with_realloc > +class dense_hash_set { + private: + // Apparently identity is not stl-standard, so we define our own + struct Identity { + typedef const Value& result_type; + const Value& operator()(const Value& v) const { return v; } + }; + struct SetKey { + void operator()(Value* value, const Value& new_key) const { + *value = new_key; + } + }; + + // The actual data + typedef dense_hashtable ht; + ht rep; + + public: + typedef typename ht::key_type key_type; + typedef typename ht::value_type value_type; + typedef typename ht::hasher hasher; + typedef typename ht::key_equal key_equal; + typedef Alloc allocator_type; + + typedef typename ht::size_type size_type; + typedef typename ht::difference_type difference_type; + typedef typename ht::const_pointer pointer; + typedef typename ht::const_pointer const_pointer; + typedef typename ht::const_reference reference; + typedef typename ht::const_reference const_reference; + + typedef typename ht::const_iterator iterator; + typedef typename ht::const_iterator const_iterator; + typedef typename ht::const_local_iterator local_iterator; + typedef typename ht::const_local_iterator const_local_iterator; + + + // Iterator functions -- recall all iterators are const + iterator begin() const { return rep.begin(); } + iterator end() const { return rep.end(); } + + // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements. + local_iterator begin(size_type i) const { return rep.begin(i); } + local_iterator end(size_type i) const { return rep.end(i); } + + + // Accessor functions + allocator_type get_allocator() const { return rep.get_allocator(); } + hasher hash_funct() const { return rep.hash_funct(); } + hasher hash_function() const { return hash_funct(); } // tr1 name + key_equal key_eq() const { return rep.key_eq(); } + + + // Constructors + explicit dense_hash_set(size_type expected_max_items_in_table = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { + } + + template + dense_hash_set(InputIterator f, InputIterator l, + const key_type& empty_key_val, + size_type expected_max_items_in_table = 0, + const hasher& hf = hasher(), + const key_equal& eql = key_equal(), + const allocator_type& alloc = allocator_type()) + : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) { + set_empty_key(empty_key_val); + rep.insert(f, l); + } + // We use the default copy constructor + // We use the default operator=() + // We use the default destructor + + void clear() { rep.clear(); } + // This clears the hash set without resizing it down to the minimum + // bucket count, but rather keeps the number of buckets constant + void clear_no_resize() { rep.clear_no_resize(); } + void swap(dense_hash_set& hs) { rep.swap(hs.rep); } + + + // Functions concerning size + size_type size() const { return rep.size(); } + size_type max_size() const { return rep.max_size(); } + bool empty() const { return rep.empty(); } + size_type bucket_count() const { return rep.bucket_count(); } + size_type max_bucket_count() const { return rep.max_bucket_count(); } + + // These are tr1 methods. bucket() is the bucket the key is or would be in. + size_type bucket_size(size_type i) const { return rep.bucket_size(i); } + size_type bucket(const key_type& key) const { return rep.bucket(key); } + float load_factor() const { + return size() * 1.0f / bucket_count(); + } + float max_load_factor() const { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + return grow; + } + void max_load_factor(float new_grow) { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + rep.set_resizing_parameters(shrink, new_grow); + } + // These aren't tr1 methods but perhaps ought to be. + float min_load_factor() const { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + return shrink; + } + void min_load_factor(float new_shrink) { + float shrink, grow; + rep.get_resizing_parameters(&shrink, &grow); + rep.set_resizing_parameters(new_shrink, grow); + } + // Deprecated; use min_load_factor() or max_load_factor() instead. + void set_resizing_parameters(float shrink, float grow) { + rep.set_resizing_parameters(shrink, grow); + } + + void resize(size_type hint) { rep.resize(hint); } + void rehash(size_type hint) { resize(hint); } // the tr1 name + + // Lookup routines + iterator find(const key_type& key) const { return rep.find(key); } + + size_type count(const key_type& key) const { return rep.count(key); } + + std::pair equal_range(const key_type& key) const { + return rep.equal_range(key); + } + + + // Insertion routines + std::pair insert(const value_type& obj) { + std::pair p = rep.insert(obj); + return std::pair(p.first, p.second); // const to non-const + } + template void insert(InputIterator f, InputIterator l) { + rep.insert(f, l); + } + void insert(const_iterator f, const_iterator l) { + rep.insert(f, l); + } + // Required for std::insert_iterator; the passed-in iterator is ignored. + iterator insert(iterator, const value_type& obj) { + return insert(obj).first; + } + + // Deletion and empty routines + // THESE ARE NON-STANDARD! I make you specify an "impossible" key + // value to identify deleted and empty buckets. You can change the + // deleted key as time goes on, or get rid of it entirely to be insert-only. + void set_empty_key(const key_type& key) { rep.set_empty_key(key); } + key_type empty_key() const { return rep.empty_key(); } + + void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); } + void clear_deleted_key() { rep.clear_deleted_key(); } + key_type deleted_key() const { return rep.deleted_key(); } + + // These are standard + size_type erase(const key_type& key) { return rep.erase(key); } + void erase(iterator it) { rep.erase(it); } + void erase(iterator f, iterator l) { rep.erase(f, l); } + + + // Comparison + bool operator==(const dense_hash_set& hs) const { return rep == hs.rep; } + bool operator!=(const dense_hash_set& hs) const { return rep != hs.rep; } + + + // I/O -- this is an add-on for writing metainformation to disk + // + // For maximum flexibility, this does not assume a particular + // file type (though it will probably be a FILE *). We just pass + // the fp through to rep. + + // If your keys and values are simple enough, you can pass this + // serializer to serialize()/unserialize(). "Simple enough" means + // value_type is a POD type that contains no pointers. Note, + // however, we don't try to normalize endianness. + typedef typename ht::NopointerSerializer NopointerSerializer; + + // serializer: a class providing operator()(OUTPUT*, const value_type&) + // (writing value_type to OUTPUT). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a + // pointer to a class providing size_t Write(const void*, size_t), + // which writes a buffer into a stream (which fp presumably + // owns) and returns the number of bytes successfully written. + // Note basic_ostream is not currently supported. + template + bool serialize(ValueSerializer serializer, OUTPUT* fp) { + return rep.serialize(serializer, fp); + } + + // serializer: a functor providing operator()(INPUT*, value_type*) + // (reading from INPUT and into value_type). You can specify a + // NopointerSerializer object if appropriate (see above). + // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a + // pointer to a class providing size_t Read(void*, size_t), + // which reads into a buffer from a stream (which fp presumably + // owns) and returns the number of bytes successfully read. + // Note basic_istream is not currently supported. + template + bool unserialize(ValueSerializer serializer, INPUT* fp) { + return rep.unserialize(serializer, fp); + } +}; + +template +inline void swap(dense_hash_set& hs1, + dense_hash_set& hs2) { + hs1.swap(hs2); +} + +_END_GOOGLE_NAMESPACE_ + +#endif /* _DENSE_HASH_SET_H_ */ diff --git a/src/third_party/sparsehash/src/sparsehash/internal/densehashtable.h b/src/third_party/sparsehash/src/sparsehash/internal/densehashtable.h new file mode 100644 index 000000000..64822bc49 --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/internal/densehashtable.h @@ -0,0 +1,1325 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// +// A dense hashtable is a particular implementation of +// a hashtable: one that is meant to minimize memory allocation. +// It does this by using an array to store all the data. We +// steal a value from the key space to indicate "empty" array +// elements (ie indices where no item lives) and another to indicate +// "deleted" elements. +// +// (Note it is possible to change the value of the delete key +// on the fly; you can even remove it, though after that point +// the hashtable is insert_only until you set it again. The empty +// value however can't be changed.) +// +// To minimize allocation and pointer overhead, we use internal +// probing, in which the hashtable is a single table, and collisions +// are resolved by trying to insert again in another bucket. The +// most cache-efficient internal probing schemes are linear probing +// (which suffers, alas, from clumping) and quadratic probing, which +// is what we implement by default. +// +// Type requirements: value_type is required to be Copy Constructible +// and Default Constructible. It is not required to be (and commonly +// isn't) Assignable. +// +// You probably shouldn't use this code directly. Use dense_hash_map<> +// or dense_hash_set<> instead. + +// You can change the following below: +// HT_OCCUPANCY_PCT -- how full before we double size +// HT_EMPTY_PCT -- how empty before we halve size +// HT_MIN_BUCKETS -- default smallest bucket size +// +// You can also change enlarge_factor (which defaults to +// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to +// HT_EMPTY_PCT) with set_resizing_parameters(). +// +// How to decide what values to use? +// shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good. +// HT_MIN_BUCKETS is probably unnecessary since you can specify +// (indirectly) the starting number of buckets at construct-time. +// For enlarge_factor, you can use this chart to try to trade-off +// expected lookup time to the space taken up. By default, this +// code uses quadratic probing, though you can change it to linear +// via JUMP_ below if you really want to. +// +// From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html +// NUMBER OF PROBES / LOOKUP Successful Unsuccessful +// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L) +// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2 +// +// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99 +// QUADRATIC COLLISION RES. +// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11 +// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6 +// LINEAR COLLISION RES. +// probes/successful lookup 1.06 1.5 1.75 2.5 3.0 5.5 50.5 +// probes/unsuccessful lookup 1.12 2.5 3.6 8.5 13.0 50.0 5000.0 + +#ifndef _DENSEHASHTABLE_H_ +#define _DENSEHASHTABLE_H_ + +#include +#include +#include // for FILE, fwrite, fread +#include // For swap(), eg +#include // For iterator tags +#include // for numeric_limits +#include // For uninitialized_fill +#include // for pair +#include +#include +#include +#include // For length_error + +_START_GOOGLE_NAMESPACE_ + +namespace base { // just to make google->opensource transition easier +using GOOGLE_NAMESPACE::true_type; +using GOOGLE_NAMESPACE::false_type; +using GOOGLE_NAMESPACE::integral_constant; +using GOOGLE_NAMESPACE::is_same; +using GOOGLE_NAMESPACE::remove_const; +} + +// The probing method +// Linear probing +// #define JUMP_(key, num_probes) ( 1 ) +// Quadratic probing +#define JUMP_(key, num_probes) ( num_probes ) + +// Hashtable class, used to implement the hashed associative containers +// hash_set and hash_map. + +// Value: what is stored in the table (each bucket is a Value). +// Key: something in a 1-to-1 correspondence to a Value, that can be used +// to search for a Value in the table (find() takes a Key). +// HashFcn: Takes a Key and returns an integer, the more unique the better. +// ExtractKey: given a Value, returns the unique Key associated with it. +// Must inherit from unary_function, or at least have a +// result_type enum indicating the return type of operator(). +// SetKey: given a Value* and a Key, modifies the value such that +// ExtractKey(value) == key. We guarantee this is only called +// with key == deleted_key or key == empty_key. +// EqualKey: Given two Keys, says whether they are the same (that is, +// if they are both associated with the same Value). +// Alloc: STL allocator to use to allocate memory. + +template +class dense_hashtable; + +template +struct dense_hashtable_iterator; + +template +struct dense_hashtable_const_iterator; + +// We're just an array, but we need to skip over empty and deleted elements +template +struct dense_hashtable_iterator { + private: + typedef typename A::template rebind::other value_alloc_type; + + public: + typedef dense_hashtable_iterator iterator; + typedef dense_hashtable_const_iterator const_iterator; + + typedef std::forward_iterator_tag iterator_category; // very little defined! + typedef V value_type; + typedef typename value_alloc_type::difference_type difference_type; + typedef typename value_alloc_type::size_type size_type; + typedef typename value_alloc_type::reference reference; + typedef typename value_alloc_type::pointer pointer; + + // "Real" constructor and default constructor + dense_hashtable_iterator(const dense_hashtable *h, + pointer it, pointer it_end, bool advance) + : ht(h), pos(it), end(it_end) { + if (advance) advance_past_empty_and_deleted(); + } + dense_hashtable_iterator() { } + // The default destructor is fine; we don't define one + // The default operator= is fine; we don't define one + + // Happy dereferencer + reference operator*() const { return *pos; } + pointer operator->() const { return &(operator*()); } + + // Arithmetic. The only hard part is making sure that + // we're not on an empty or marked-deleted array element + void advance_past_empty_and_deleted() { + while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) ) + ++pos; + } + iterator& operator++() { + assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this; + } + iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; } + + // Comparison. + bool operator==(const iterator& it) const { return pos == it.pos; } + bool operator!=(const iterator& it) const { return pos != it.pos; } + + + // The actual data + const dense_hashtable *ht; + pointer pos, end; +}; + + +// Now do it all again, but with const-ness! +template +struct dense_hashtable_const_iterator { + private: + typedef typename A::template rebind::other value_alloc_type; + + public: + typedef dense_hashtable_iterator iterator; + typedef dense_hashtable_const_iterator const_iterator; + + typedef std::forward_iterator_tag iterator_category; // very little defined! + typedef V value_type; + typedef typename value_alloc_type::difference_type difference_type; + typedef typename value_alloc_type::size_type size_type; + typedef typename value_alloc_type::const_reference reference; + typedef typename value_alloc_type::const_pointer pointer; + + // "Real" constructor and default constructor + dense_hashtable_const_iterator( + const dense_hashtable *h, + pointer it, pointer it_end, bool advance) + : ht(h), pos(it), end(it_end) { + if (advance) advance_past_empty_and_deleted(); + } + dense_hashtable_const_iterator() + : ht(NULL), pos(pointer()), end(pointer()) { } + // This lets us convert regular iterators to const iterators + dense_hashtable_const_iterator(const iterator &it) + : ht(it.ht), pos(it.pos), end(it.end) { } + // The default destructor is fine; we don't define one + // The default operator= is fine; we don't define one + + // Happy dereferencer + reference operator*() const { return *pos; } + pointer operator->() const { return &(operator*()); } + + // Arithmetic. The only hard part is making sure that + // we're not on an empty or marked-deleted array element + void advance_past_empty_and_deleted() { + while ( pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)) ) + ++pos; + } + const_iterator& operator++() { + assert(pos != end); ++pos; advance_past_empty_and_deleted(); return *this; + } + const_iterator operator++(int) { const_iterator tmp(*this); ++*this; return tmp; } + + // Comparison. + bool operator==(const const_iterator& it) const { return pos == it.pos; } + bool operator!=(const const_iterator& it) const { return pos != it.pos; } + + + // The actual data + const dense_hashtable *ht; + pointer pos, end; +}; + +template +class dense_hashtable { + private: + typedef typename Alloc::template rebind::other value_alloc_type; + + public: + typedef Key key_type; + typedef Value value_type; + typedef HashFcn hasher; + typedef EqualKey key_equal; + typedef Alloc allocator_type; + + typedef typename value_alloc_type::size_type size_type; + typedef typename value_alloc_type::difference_type difference_type; + typedef typename value_alloc_type::reference reference; + typedef typename value_alloc_type::const_reference const_reference; + typedef typename value_alloc_type::pointer pointer; + typedef typename value_alloc_type::const_pointer const_pointer; + typedef dense_hashtable_iterator + iterator; + + typedef dense_hashtable_const_iterator + const_iterator; + + // These come from tr1. For us they're the same as regular iterators. + typedef iterator local_iterator; + typedef const_iterator const_local_iterator; + + // How full we let the table get before we resize, by default. + // Knuth says .8 is good -- higher causes us to probe too much, + // though it saves memory. + static const int HT_OCCUPANCY_PCT; // defined at the bottom of this file + + // How empty we let the table get before we resize lower, by default. + // (0.0 means never resize lower.) + // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing + static const int HT_EMPTY_PCT; // defined at the bottom of this file + + // Minimum size we're willing to let hashtables be. + // Must be a power of two, and at least 4. + // Note, however, that for a given hashtable, the initial size is a + // function of the first constructor arg, and may be >HT_MIN_BUCKETS. + static const size_type HT_MIN_BUCKETS = 4; + + // By default, if you don't specify a hashtable size at + // construction-time, we use this size. Must be a power of two, and + // at least HT_MIN_BUCKETS. + static const size_type HT_DEFAULT_STARTING_BUCKETS = 32; + + // ITERATOR FUNCTIONS + iterator begin() { return iterator(this, table, + table + num_buckets, true); } + iterator end() { return iterator(this, table + num_buckets, + table + num_buckets, true); } + const_iterator begin() const { return const_iterator(this, table, + table+num_buckets,true);} + const_iterator end() const { return const_iterator(this, table + num_buckets, + table+num_buckets,true);} + + // These come from tr1 unordered_map. They iterate over 'bucket' n. + // We'll just consider bucket n to be the n-th element of the table. + local_iterator begin(size_type i) { + return local_iterator(this, table + i, table + i+1, false); + } + local_iterator end(size_type i) { + local_iterator it = begin(i); + if (!test_empty(i) && !test_deleted(i)) + ++it; + return it; + } + const_local_iterator begin(size_type i) const { + return const_local_iterator(this, table + i, table + i+1, false); + } + const_local_iterator end(size_type i) const { + const_local_iterator it = begin(i); + if (!test_empty(i) && !test_deleted(i)) + ++it; + return it; + } + + // ACCESSOR FUNCTIONS for the things we templatize on, basically + hasher hash_funct() const { return settings; } + key_equal key_eq() const { return key_info; } + allocator_type get_allocator() const { + return allocator_type(val_info); + } + + // Accessor function for statistics gathering. + int num_table_copies() const { return settings.num_ht_copies(); } + + private: + // Annoyingly, we can't copy values around, because they might have + // const components (they're probably pair). We use + // explicit destructor invocation and placement new to get around + // this. Arg. + void set_value(pointer dst, const_reference src) { + dst->~value_type(); // delete the old value, if any + new(dst) value_type(src); + } + + void destroy_buckets(size_type first, size_type last) { + for ( ; first != last; ++first) + table[first].~value_type(); + } + + // DELETE HELPER FUNCTIONS + // This lets the user describe a key that will indicate deleted + // table entries. This key should be an "impossible" entry -- + // if you try to insert it for real, you won't be able to retrieve it! + // (NB: while you pass in an entire value, only the key part is looked + // at. This is just because I don't know how to assign just a key.) + private: + void squash_deleted() { // gets rid of any deleted entries we have + if ( num_deleted ) { // get rid of deleted before writing + dense_hashtable tmp(*this); // copying will get rid of deleted + swap(tmp); // now we are tmp + } + assert(num_deleted == 0); + } + + // Test if the given key is the deleted indicator. Requires + // num_deleted > 0, for correctness of read(), and because that + // guarantees that key_info.delkey is valid. + bool test_deleted_key(const key_type& key) const { + assert(num_deleted > 0); + return equals(key_info.delkey, key); + } + + public: + void set_deleted_key(const key_type &key) { + // the empty indicator (if specified) and the deleted indicator + // must be different + assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval))) + && "Passed the empty-key to set_deleted_key"); + // It's only safe to change what "deleted" means if we purge deleted guys + squash_deleted(); + settings.set_use_deleted(true); + key_info.delkey = key; + } + void clear_deleted_key() { + squash_deleted(); + settings.set_use_deleted(false); + } + key_type deleted_key() const { + assert(settings.use_deleted() + && "Must set deleted key before calling deleted_key"); + return key_info.delkey; + } + + // These are public so the iterators can use them + // True if the item at position bucknum is "deleted" marker + bool test_deleted(size_type bucknum) const { + // Invariant: !use_deleted() implies num_deleted is 0. + assert(settings.use_deleted() || num_deleted == 0); + return num_deleted > 0 && test_deleted_key(get_key(table[bucknum])); + } + bool test_deleted(const iterator &it) const { + // Invariant: !use_deleted() implies num_deleted is 0. + assert(settings.use_deleted() || num_deleted == 0); + return num_deleted > 0 && test_deleted_key(get_key(*it)); + } + bool test_deleted(const const_iterator &it) const { + // Invariant: !use_deleted() implies num_deleted is 0. + assert(settings.use_deleted() || num_deleted == 0); + return num_deleted > 0 && test_deleted_key(get_key(*it)); + } + + private: + void check_use_deleted(const char* caller) { + (void)caller; // could log it if the assert failed + assert(settings.use_deleted()); + } + + // Set it so test_deleted is true. true if object didn't used to be deleted. + bool set_deleted(iterator &it) { + check_use_deleted("set_deleted()"); + bool retval = !test_deleted(it); + // &* converts from iterator to value-type. + set_key(&(*it), key_info.delkey); + return retval; + } + // Set it so test_deleted is false. true if object used to be deleted. + bool clear_deleted(iterator &it) { + check_use_deleted("clear_deleted()"); + // Happens automatically when we assign something else in its place. + return test_deleted(it); + } + + // We also allow to set/clear the deleted bit on a const iterator. + // We allow a const_iterator for the same reason you can delete a + // const pointer: it's convenient, and semantically you can't use + // 'it' after it's been deleted anyway, so its const-ness doesn't + // really matter. + bool set_deleted(const_iterator &it) { + check_use_deleted("set_deleted()"); + bool retval = !test_deleted(it); + set_key(const_cast(&(*it)), key_info.delkey); + return retval; + } + // Set it so test_deleted is false. true if object used to be deleted. + bool clear_deleted(const_iterator &it) { + check_use_deleted("clear_deleted()"); + return test_deleted(it); + } + + // EMPTY HELPER FUNCTIONS + // This lets the user describe a key that will indicate empty (unused) + // table entries. This key should be an "impossible" entry -- + // if you try to insert it for real, you won't be able to retrieve it! + // (NB: while you pass in an entire value, only the key part is looked + // at. This is just because I don't know how to assign just a key.) + public: + // These are public so the iterators can use them + // True if the item at position bucknum is "empty" marker + bool test_empty(size_type bucknum) const { + assert(settings.use_empty()); // we always need to know what's empty! + return equals(get_key(val_info.emptyval), get_key(table[bucknum])); + } + bool test_empty(const iterator &it) const { + assert(settings.use_empty()); // we always need to know what's empty! + return equals(get_key(val_info.emptyval), get_key(*it)); + } + bool test_empty(const const_iterator &it) const { + assert(settings.use_empty()); // we always need to know what's empty! + return equals(get_key(val_info.emptyval), get_key(*it)); + } + + private: + void fill_range_with_empty(pointer table_start, pointer table_end) { + std::uninitialized_fill(table_start, table_end, val_info.emptyval); + } + + public: + // TODO(csilvers): change all callers of this to pass in a key instead, + // and take a const key_type instead of const value_type. + void set_empty_key(const_reference val) { + // Once you set the empty key, you can't change it + assert(!settings.use_empty() && "Calling set_empty_key multiple times"); + // The deleted indicator (if specified) and the empty indicator + // must be different. + assert((!settings.use_deleted() || !equals(get_key(val), key_info.delkey)) + && "Setting the empty key the same as the deleted key"); + settings.set_use_empty(true); + set_value(&val_info.emptyval, val); + + assert(!table); // must set before first use + // num_buckets was set in constructor even though table was NULL + table = val_info.allocate(num_buckets); + assert(table); + fill_range_with_empty(table, table + num_buckets); + } + // TODO(user): return a key_type rather than a value_type + value_type empty_key() const { + assert(settings.use_empty()); + return val_info.emptyval; + } + + // FUNCTIONS CONCERNING SIZE + public: + size_type size() const { return num_elements - num_deleted; } + size_type max_size() const { return val_info.max_size(); } + bool empty() const { return size() == 0; } + size_type bucket_count() const { return num_buckets; } + size_type max_bucket_count() const { return max_size(); } + size_type nonempty_bucket_count() const { return num_elements; } + // These are tr1 methods. Their idea of 'bucket' doesn't map well to + // what we do. We just say every bucket has 0 or 1 items in it. + size_type bucket_size(size_type i) const { + return begin(i) == end(i) ? 0 : 1; + } + + private: + // Because of the above, size_type(-1) is never legal; use it for errors + static const size_type ILLEGAL_BUCKET = size_type(-1); + + // Used after a string of deletes. Returns true if we actually shrunk. + // TODO(csilvers): take a delta so we can take into account inserts + // done after shrinking. Maybe make part of the Settings class? + bool maybe_shrink() { + assert(num_elements >= num_deleted); + assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two + assert(bucket_count() >= HT_MIN_BUCKETS); + bool retval = false; + + // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS, + // we'll never shrink until you get relatively big, and we'll never + // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something + // like "dense_hash_set x; x.insert(4); x.erase(4);" will + // shrink us down to HT_MIN_BUCKETS buckets, which is too small. + const size_type num_remain = num_elements - num_deleted; + const size_type shrink_threshold = settings.shrink_threshold(); + if (shrink_threshold > 0 && num_remain < shrink_threshold && + bucket_count() > HT_DEFAULT_STARTING_BUCKETS) { + const float shrink_factor = settings.shrink_factor(); + size_type sz = bucket_count() / 2; // find how much we should shrink + while (sz > HT_DEFAULT_STARTING_BUCKETS && + num_remain < sz * shrink_factor) { + sz /= 2; // stay a power of 2 + } + dense_hashtable tmp(*this, sz); // Do the actual resizing + swap(tmp); // now we are tmp + retval = true; + } + settings.set_consider_shrink(false); // because we just considered it + return retval; + } + + // We'll let you resize a hashtable -- though this makes us copy all! + // When you resize, you say, "make it big enough for this many more elements" + // Returns true if we actually resized, false if size was already ok. + bool resize_delta(size_type delta) { + bool did_resize = false; + if ( settings.consider_shrink() ) { // see if lots of deletes happened + if ( maybe_shrink() ) + did_resize = true; + } + if (num_elements >= + (std::numeric_limits::max)() - delta) { + throw std::length_error("resize overflow"); + } + if ( bucket_count() >= HT_MIN_BUCKETS && + (num_elements + delta) <= settings.enlarge_threshold() ) + return did_resize; // we're ok as we are + + // Sometimes, we need to resize just to get rid of all the + // "deleted" buckets that are clogging up the hashtable. So when + // deciding whether to resize, count the deleted buckets (which + // are currently taking up room). But later, when we decide what + // size to resize to, *don't* count deleted buckets, since they + // get discarded during the resize. + size_type needed_size = settings.min_buckets(num_elements + delta, 0); + if ( needed_size <= bucket_count() ) // we have enough buckets + return did_resize; + + size_type resize_to = + settings.min_buckets(num_elements - num_deleted + delta, bucket_count()); + + // When num_deleted is large, we may still grow but we do not want to + // over expand. So we reduce needed_size by a portion of num_deleted + // (the exact portion does not matter). This is especially helpful + // when min_load_factor is zero (no shrink at all) to avoid doubling + // the bucket count to infinity. See also test ResizeWithoutShrink. + needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0); + if (resize_to < needed_size && // may double resize_to + resize_to < (std::numeric_limits::max)() / 2) { + // This situation means that we have enough deleted elements, + // that once we purge them, we won't actually have needed to + // grow. But we may want to grow anyway: if we just purge one + // element, say, we'll have to grow anyway next time we + // insert. Might as well grow now, since we're already going + // through the trouble of copying (in order to purge the + // deleted elements). + const size_type target = + static_cast(settings.shrink_size(resize_to*2)); + if (num_elements - num_deleted + delta >= target) { + // Good, we won't be below the shrink threshhold even if we double. + resize_to *= 2; + } + } + dense_hashtable tmp(*this, resize_to); + swap(tmp); // now we are tmp + return true; + } + + // We require table be not-NULL and empty before calling this. + void resize_table(size_type /*old_size*/, size_type new_size, + base::true_type) { + table = val_info.realloc_or_die(table, new_size); + } + + void resize_table(size_type old_size, size_type new_size, base::false_type) { + val_info.deallocate(table, old_size); + table = val_info.allocate(new_size); + } + + // Used to actually do the rehashing when we grow/shrink a hashtable + void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) { + clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted)); + + // We use a normal iterator to get non-deleted bcks from ht + // We could use insert() here, but since we know there are + // no duplicates and no deleted items, we can be more efficient + assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two + for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) { + size_type num_probes = 0; // how many times we've probed + size_type bucknum; + const size_type bucket_count_minus_one = bucket_count() - 1; + for (bucknum = hash(get_key(*it)) & bucket_count_minus_one; + !test_empty(bucknum); // not empty + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) { + ++num_probes; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + set_value(&table[bucknum], *it); // copies the value to here + num_elements++; + } + settings.inc_num_ht_copies(); + } + + // Required by the spec for hashed associative container + public: + // Though the docs say this should be num_buckets, I think it's much + // more useful as num_elements. As a special feature, calling with + // req_elements==0 will cause us to shrink if we can, saving space. + void resize(size_type req_elements) { // resize to this or larger + if ( settings.consider_shrink() || req_elements == 0 ) + maybe_shrink(); + if ( req_elements > num_elements ) + resize_delta(req_elements - num_elements); + } + + // Get and change the value of shrink_factor and enlarge_factor. The + // description at the beginning of this file explains how to choose + // the values. Setting the shrink parameter to 0.0 ensures that the + // table never shrinks. + void get_resizing_parameters(float* shrink, float* grow) const { + *shrink = settings.shrink_factor(); + *grow = settings.enlarge_factor(); + } + void set_resizing_parameters(float shrink, float grow) { + settings.set_resizing_parameters(shrink, grow); + settings.reset_thresholds(bucket_count()); + } + + // CONSTRUCTORS -- as required by the specs, we take a size, + // but also let you specify a hashfunction, key comparator, + // and key extractor. We also define a copy constructor and =. + // DESTRUCTOR -- needs to free the table + explicit dense_hashtable(size_type expected_max_items_in_table = 0, + const HashFcn& hf = HashFcn(), + const EqualKey& eql = EqualKey(), + const ExtractKey& ext = ExtractKey(), + const SetKey& set = SetKey(), + const Alloc& alloc = Alloc()) + : settings(hf), + key_info(ext, set, eql), + num_deleted(0), + num_elements(0), + num_buckets(expected_max_items_in_table == 0 + ? HT_DEFAULT_STARTING_BUCKETS + : settings.min_buckets(expected_max_items_in_table, 0)), + val_info(alloc_impl(alloc)), + table(NULL) { + // table is NULL until emptyval is set. However, we set num_buckets + // here so we know how much space to allocate once emptyval is set + settings.reset_thresholds(bucket_count()); + } + + // As a convenience for resize(), we allow an optional second argument + // which lets you make this new hashtable a different size than ht + dense_hashtable(const dense_hashtable& ht, + size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS) + : settings(ht.settings), + key_info(ht.key_info), + num_deleted(0), + num_elements(0), + num_buckets(0), + val_info(ht.val_info), + table(NULL) { + if (!ht.settings.use_empty()) { + // If use_empty isn't set, copy_from will crash, so we do our own copying. + assert(ht.empty()); + num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted); + settings.reset_thresholds(bucket_count()); + return; + } + settings.reset_thresholds(bucket_count()); + copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries + } + + dense_hashtable& operator= (const dense_hashtable& ht) { + if (&ht == this) return *this; // don't copy onto ourselves + if (!ht.settings.use_empty()) { + assert(ht.empty()); + dense_hashtable empty_table(ht); // empty table with ht's thresholds + this->swap(empty_table); + return *this; + } + settings = ht.settings; + key_info = ht.key_info; + set_value(&val_info.emptyval, ht.val_info.emptyval); + // copy_from() calls clear and sets num_deleted to 0 too + copy_from(ht, HT_MIN_BUCKETS); + // we purposefully don't copy the allocator, which may not be copyable + return *this; + } + + ~dense_hashtable() { + if (table) { + destroy_buckets(0, num_buckets); + val_info.deallocate(table, num_buckets); + } + } + + // Many STL algorithms use swap instead of copy constructors + void swap(dense_hashtable& ht) { + std::swap(settings, ht.settings); + std::swap(key_info, ht.key_info); + std::swap(num_deleted, ht.num_deleted); + std::swap(num_elements, ht.num_elements); + std::swap(num_buckets, ht.num_buckets); + { value_type tmp; // for annoying reasons, swap() doesn't work + set_value(&tmp, val_info.emptyval); + set_value(&val_info.emptyval, ht.val_info.emptyval); + set_value(&ht.val_info.emptyval, tmp); + } + std::swap(table, ht.table); + settings.reset_thresholds(bucket_count()); // also resets consider_shrink + ht.settings.reset_thresholds(ht.bucket_count()); + // we purposefully don't swap the allocator, which may not be swap-able + } + + private: + void clear_to_size(size_type new_num_buckets) { + if (!table) { + table = val_info.allocate(new_num_buckets); + } else { + destroy_buckets(0, num_buckets); + if (new_num_buckets != num_buckets) { // resize, if necessary + typedef base::integral_constant >::value> + realloc_ok; + resize_table(num_buckets, new_num_buckets, realloc_ok()); + } + } + assert(table); + fill_range_with_empty(table, table + new_num_buckets); + num_elements = 0; + num_deleted = 0; + num_buckets = new_num_buckets; // our new size + settings.reset_thresholds(bucket_count()); + } + + public: + // It's always nice to be able to clear a table without deallocating it + void clear() { + // If the table is already empty, and the number of buckets is + // already as we desire, there's nothing to do. + const size_type new_num_buckets = settings.min_buckets(0, 0); + if (num_elements == 0 && new_num_buckets == num_buckets) { + return; + } + clear_to_size(new_num_buckets); + } + + // Clear the table without resizing it. + // Mimicks the stl_hashtable's behaviour when clear()-ing in that it + // does not modify the bucket count + void clear_no_resize() { + if (num_elements > 0) { + assert(table); + destroy_buckets(0, num_buckets); + fill_range_with_empty(table, table + num_buckets); + } + // don't consider to shrink before another erase() + settings.reset_thresholds(bucket_count()); + num_elements = 0; + num_deleted = 0; + } + + // LOOKUP ROUTINES + private: + // Returns a pair of positions: 1st where the object is, 2nd where + // it would go if you wanted to insert it. 1st is ILLEGAL_BUCKET + // if object is not found; 2nd is ILLEGAL_BUCKET if it is. + // Note: because of deletions where-to-insert is not trivial: it's the + // first deleted bucket we see, as long as we don't find the key later + std::pair find_position(const key_type &key) const { + size_type num_probes = 0; // how many times we've probed + const size_type bucket_count_minus_one = bucket_count() - 1; + size_type bucknum = hash(key) & bucket_count_minus_one; + size_type insert_pos = ILLEGAL_BUCKET; // where we would insert + while ( 1 ) { // probe until something happens + if ( test_empty(bucknum) ) { // bucket is empty + if ( insert_pos == ILLEGAL_BUCKET ) // found no prior place to insert + return std::pair(ILLEGAL_BUCKET, bucknum); + else + return std::pair(ILLEGAL_BUCKET, insert_pos); + + } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert + if ( insert_pos == ILLEGAL_BUCKET ) + insert_pos = bucknum; + + } else if ( equals(key, get_key(table[bucknum])) ) { + return std::pair(bucknum, ILLEGAL_BUCKET); + } + ++num_probes; // we're doing another probe + bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one; + assert(num_probes < bucket_count() + && "Hashtable is full: an error in key_equal<> or hash<>"); + } + } + + public: + + iterator find(const key_type& key) { + if ( size() == 0 ) return end(); + std::pair pos = find_position(key); + if ( pos.first == ILLEGAL_BUCKET ) // alas, not there + return end(); + else + return iterator(this, table + pos.first, table + num_buckets, false); + } + + const_iterator find(const key_type& key) const { + if ( size() == 0 ) return end(); + std::pair pos = find_position(key); + if ( pos.first == ILLEGAL_BUCKET ) // alas, not there + return end(); + else + return const_iterator(this, table + pos.first, table+num_buckets, false); + } + + // This is a tr1 method: the bucket a given key is in, or what bucket + // it would be put in, if it were to be inserted. Shrug. + size_type bucket(const key_type& key) const { + std::pair pos = find_position(key); + return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first; + } + + // Counts how many elements have key key. For maps, it's either 0 or 1. + size_type count(const key_type &key) const { + std::pair pos = find_position(key); + return pos.first == ILLEGAL_BUCKET ? 0 : 1; + } + + // Likewise, equal_range doesn't really make sense for us. Oh well. + std::pair equal_range(const key_type& key) { + iterator pos = find(key); // either an iterator or end + if (pos == end()) { + return std::pair(pos, pos); + } else { + const iterator startpos = pos++; + return std::pair(startpos, pos); + } + } + std::pair equal_range(const key_type& key) + const { + const_iterator pos = find(key); // either an iterator or end + if (pos == end()) { + return std::pair(pos, pos); + } else { + const const_iterator startpos = pos++; + return std::pair(startpos, pos); + } + } + + + // INSERTION ROUTINES + private: + // Private method used by insert_noresize and find_or_insert. + iterator insert_at(const_reference obj, size_type pos) { + if (size() >= max_size()) { + throw std::length_error("insert overflow"); + } + if ( test_deleted(pos) ) { // just replace if it's been del. + // shrug: shouldn't need to be const. + const_iterator delpos(this, table + pos, table + num_buckets, false); + clear_deleted(delpos); + assert( num_deleted > 0); + --num_deleted; // used to be, now it isn't + } else { + ++num_elements; // replacing an empty bucket + } + set_value(&table[pos], obj); + return iterator(this, table + pos, table + num_buckets, false); + } + + // If you know *this is big enough to hold obj, use this routine + std::pair insert_noresize(const_reference obj) { + // First, double-check we're not inserting delkey or emptyval + assert((!settings.use_empty() || !equals(get_key(obj), + get_key(val_info.emptyval))) + && "Inserting the empty key"); + assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) + && "Inserting the deleted key"); + const std::pair pos = find_position(get_key(obj)); + if ( pos.first != ILLEGAL_BUCKET) { // object was already there + return std::pair(iterator(this, table + pos.first, + table + num_buckets, false), + false); // false: we didn't insert + } else { // pos.second says where to put it + return std::pair(insert_at(obj, pos.second), true); + } + } + + // Specializations of insert(it, it) depending on the power of the iterator: + // (1) Iterator supports operator-, resize before inserting + template + void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) { + size_t dist = std::distance(f, l); + if (dist >= (std::numeric_limits::max)()) { + throw std::length_error("insert-range overflow"); + } + resize_delta(static_cast(dist)); + for ( ; dist > 0; --dist, ++f) { + insert_noresize(*f); + } + } + + // (2) Arbitrary iterator, can't tell how much to resize + template + void insert(InputIterator f, InputIterator l, std::input_iterator_tag) { + for ( ; f != l; ++f) + insert(*f); + } + + public: + // This is the normal insert routine, used by the outside world + std::pair insert(const_reference obj) { + resize_delta(1); // adding an object, grow if need be + return insert_noresize(obj); + } + + // When inserting a lot at a time, we specialize on the type of iterator + template + void insert(InputIterator f, InputIterator l) { + // specializes on iterator type + insert(f, l, + typename std::iterator_traits::iterator_category()); + } + + // DefaultValue is a functor that takes a key and returns a value_type + // representing the default value to be inserted if none is found. + template + value_type& find_or_insert(const key_type& key) { + // First, double-check we're not inserting emptykey or delkey + assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval))) + && "Inserting the empty key"); + assert((!settings.use_deleted() || !equals(key, key_info.delkey)) + && "Inserting the deleted key"); + const std::pair pos = find_position(key); + DefaultValue default_value; + if ( pos.first != ILLEGAL_BUCKET) { // object was already there + return table[pos.first]; + } else if (resize_delta(1)) { // needed to rehash to make room + // Since we resized, we can't use pos, so recalculate where to insert. + return *insert_noresize(default_value(key)).first; + } else { // no need to rehash, insert right here + return *insert_at(default_value(key), pos.second); + } + } + + + // DELETION ROUTINES + size_type erase(const key_type& key) { + // First, double-check we're not trying to erase delkey or emptyval. + assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval))) + && "Erasing the empty key"); + assert((!settings.use_deleted() || !equals(key, key_info.delkey)) + && "Erasing the deleted key"); + const_iterator pos = find(key); // shrug: shouldn't need to be const + if ( pos != end() ) { + assert(!test_deleted(pos)); // or find() shouldn't have returned it + set_deleted(pos); + ++num_deleted; + settings.set_consider_shrink(true); // will think about shrink after next insert + return 1; // because we deleted one thing + } else { + return 0; // because we deleted nothing + } + } + + // We return the iterator past the deleted item. + void erase(iterator pos) { + if ( pos == end() ) return; // sanity check + if ( set_deleted(pos) ) { // true if object has been newly deleted + ++num_deleted; + settings.set_consider_shrink(true); // will think about shrink after next insert + } + } + + void erase(iterator f, iterator l) { + for ( ; f != l; ++f) { + if ( set_deleted(f) ) // should always be true + ++num_deleted; + } + settings.set_consider_shrink(true); // will think about shrink after next insert + } + + // We allow you to erase a const_iterator just like we allow you to + // erase an iterator. This is in parallel to 'delete': you can delete + // a const pointer just like a non-const pointer. The logic is that + // you can't use the object after it's erased anyway, so it doesn't matter + // if it's const or not. + void erase(const_iterator pos) { + if ( pos == end() ) return; // sanity check + if ( set_deleted(pos) ) { // true if object has been newly deleted + ++num_deleted; + settings.set_consider_shrink(true); // will think about shrink after next insert + } + } + void erase(const_iterator f, const_iterator l) { + for ( ; f != l; ++f) { + if ( set_deleted(f) ) // should always be true + ++num_deleted; + } + settings.set_consider_shrink(true); // will think about shrink after next insert + } + + + // COMPARISON + bool operator==(const dense_hashtable& ht) const { + if (size() != ht.size()) { + return false; + } else if (this == &ht) { + return true; + } else { + // Iterate through the elements in "this" and see if the + // corresponding element is in ht + for ( const_iterator it = begin(); it != end(); ++it ) { + const_iterator it2 = ht.find(get_key(*it)); + if ((it2 == ht.end()) || (*it != *it2)) { + return false; + } + } + return true; + } + } + bool operator!=(const dense_hashtable& ht) const { + return !(*this == ht); + } + + + // I/O + // We support reading and writing hashtables to disk. Alas, since + // I don't know how to write a hasher or key_equal, you have to make + // sure everything but the table is the same. We compact before writing. + private: + // Every time the disk format changes, this should probably change too + typedef unsigned long MagicNumberType; + static const MagicNumberType MAGIC_NUMBER = 0x13578642; + + public: + // I/O -- this is an add-on for writing hash table to disk + // + // INPUT and OUTPUT must be either a FILE, *or* a C++ stream + // (istream, ostream, etc) *or* a class providing + // Read(void*, size_t) and Write(const void*, size_t) + // (respectively), which writes a buffer into a stream + // (which the INPUT/OUTPUT instance presumably owns). + + typedef sparsehash_internal::pod_serializer NopointerSerializer; + + // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&) + template + bool serialize(ValueSerializer serializer, OUTPUT *fp) { + squash_deleted(); // so we don't have to worry about delkey + if ( !sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4) ) + return false; + if ( !sparsehash_internal::write_bigendian_number(fp, num_buckets, 8) ) + return false; + if ( !sparsehash_internal::write_bigendian_number(fp, num_elements, 8) ) + return false; + // Now write a bitmap of non-empty buckets. + for ( size_type i = 0; i < num_buckets; i += 8 ) { + unsigned char bits = 0; + for ( int bit = 0; bit < 8; ++bit ) { + if ( i + bit < num_buckets && !test_empty(i + bit) ) + bits |= (1 << bit); + } + if ( !sparsehash_internal::write_data(fp, &bits, sizeof(bits)) ) + return false; + for ( int bit = 0; bit < 8; ++bit ) { + if ( bits & (1 << bit) ) { + if ( !serializer(fp, table[i + bit]) ) return false; + } + } + } + return true; + } + + // INPUT: anything we've written an overload of read_data() for. + // ValueSerializer: a functor. operator()(INPUT*, value_type*) + template + bool unserialize(ValueSerializer serializer, INPUT *fp) { + assert(settings.use_empty() && "empty_key not set for read"); + + clear(); // just to be consistent + MagicNumberType magic_read; + if ( !sparsehash_internal::read_bigendian_number(fp, &magic_read, 4) ) + return false; + if ( magic_read != MAGIC_NUMBER ) { + return false; + } + size_type new_num_buckets; + if ( !sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, 8) ) + return false; + clear_to_size(new_num_buckets); + if ( !sparsehash_internal::read_bigendian_number(fp, &num_elements, 8) ) + return false; + + // Read the bitmap of non-empty buckets. + for (size_type i = 0; i < num_buckets; i += 8) { + unsigned char bits; + if ( !sparsehash_internal::read_data(fp, &bits, sizeof(bits)) ) + return false; + for ( int bit = 0; bit < 8; ++bit ) { + if ( i + bit < num_buckets && (bits & (1 << bit)) ) { // not empty + if ( !serializer(fp, &table[i + bit]) ) return false; + } + } + } + return true; + } + + private: + template + class alloc_impl : public A { + public: + typedef typename A::pointer pointer; + typedef typename A::size_type size_type; + + // Convert a normal allocator to one that has realloc_or_die() + alloc_impl(const A& a) : A(a) { } + + // realloc_or_die should only be used when using the default + // allocator (libc_allocator_with_realloc). + pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) { + fprintf(stderr, "realloc_or_die is only supported for " + "libc_allocator_with_realloc\n"); + exit(1); + return NULL; + } + }; + + // A template specialization of alloc_impl for + // libc_allocator_with_realloc that can handle realloc_or_die. + template + class alloc_impl > + : public libc_allocator_with_realloc { + public: + typedef typename libc_allocator_with_realloc::pointer pointer; + typedef typename libc_allocator_with_realloc::size_type size_type; + + alloc_impl(const libc_allocator_with_realloc& a) + : libc_allocator_with_realloc(a) { } + + pointer realloc_or_die(pointer ptr, size_type n) { + pointer retval = this->reallocate(ptr, n); + if (retval == NULL) { + fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate " + "%lu elements for ptr %p", static_cast(n), static_cast(ptr)); + exit(1); + } + return retval; + } + }; + + // Package allocator with emptyval to eliminate memory needed for + // the zero-size allocator. + // If new fields are added to this class, we should add them to + // operator= and swap. + class ValInfo : public alloc_impl { + public: + typedef typename alloc_impl::value_type value_type; + + ValInfo(const alloc_impl& a) + : alloc_impl(a), emptyval() { } + ValInfo(const ValInfo& v) + : alloc_impl(v), emptyval(v.emptyval) { } + + value_type emptyval; // which key marks unused entries + }; + + + // Package functors with another class to eliminate memory needed for + // zero-size functors. Since ExtractKey and hasher's operator() might + // have the same function signature, they must be packaged in + // different classes. + struct Settings : + sparsehash_internal::sh_hashtable_settings { + explicit Settings(const hasher& hf) + : sparsehash_internal::sh_hashtable_settings( + hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {} + }; + + // Packages ExtractKey and SetKey functors. + class KeyInfo : public ExtractKey, public SetKey, public EqualKey { + public: + KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq) + : ExtractKey(ek), + SetKey(sk), + EqualKey(eq) { + } + + // We want to return the exact same type as ExtractKey: Key or const Key& + typename ExtractKey::result_type get_key(const_reference v) const { + return ExtractKey::operator()(v); + } + void set_key(pointer v, const key_type& k) const { + SetKey::operator()(v, k); + } + bool equals(const key_type& a, const key_type& b) const { + return EqualKey::operator()(a, b); + } + + // Which key marks deleted entries. + // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!) + typename base::remove_const::type delkey; + }; + + // Utility functions to access the templated operators + size_type hash(const key_type& v) const { + return settings.hash(v); + } + bool equals(const key_type& a, const key_type& b) const { + return key_info.equals(a, b); + } + typename ExtractKey::result_type get_key(const_reference v) const { + return key_info.get_key(v); + } + void set_key(pointer v, const key_type& k) const { + key_info.set_key(v, k); + } + + private: + // Actual data + Settings settings; + KeyInfo key_info; + + size_type num_deleted; // how many occupied buckets are marked deleted + size_type num_elements; + size_type num_buckets; + ValInfo val_info; // holds emptyval, and also the allocator + pointer table; +}; + + +// We need a global swap as well +template +inline void swap(dense_hashtable &x, + dense_hashtable &y) { + x.swap(y); +} + +#undef JUMP_ + +template +const typename dense_hashtable::size_type + dense_hashtable::ILLEGAL_BUCKET; + +// How full we let the table get before we resize. Knuth says .8 is +// good -- higher causes us to probe too much, though saves memory. +// However, we go with .5, getting better performance at the cost of +// more space (a trade-off densehashtable explicitly chooses to make). +// Feel free to play around with different values, though, via +// max_load_factor() and/or set_resizing_parameters(). +template +const int dense_hashtable::HT_OCCUPANCY_PCT = 50; + +// How empty we let the table get before we resize lower. +// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing. +template +const int dense_hashtable::HT_EMPTY_PCT + = static_cast(0.4 * + dense_hashtable::HT_OCCUPANCY_PCT); + +_END_GOOGLE_NAMESPACE_ + +#endif /* _DENSEHASHTABLE_H_ */ diff --git a/src/third_party/sparsehash/src/sparsehash/internal/hashtable-common.h b/src/third_party/sparsehash/src/sparsehash/internal/hashtable-common.h new file mode 100644 index 000000000..bac2b8823 --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/internal/hashtable-common.h @@ -0,0 +1,381 @@ +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// +// Provides classes shared by both sparse and dense hashtable. +// +// sh_hashtable_settings has parameters for growing and shrinking +// a hashtable. It also packages zero-size functor (ie. hasher). +// +// Other functions and classes provide common code for serializing +// and deserializing hashtables to a stream (such as a FILE*). + +#ifndef UTIL_GTL_HASHTABLE_COMMON_H_ +#define UTIL_GTL_HASHTABLE_COMMON_H_ + +#include +#include +#include +#include // for size_t +#include +#include // For length_error + +_START_GOOGLE_NAMESPACE_ + +template struct SparsehashCompileAssert { }; +#define SPARSEHASH_COMPILE_ASSERT(expr, msg) \ + __attribute__((unused)) typedef SparsehashCompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] + +namespace sparsehash_internal { + +// Adaptor methods for reading/writing data from an INPUT or OUPTUT +// variable passed to serialize() or unserialize(). For now we +// have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note +// they are pointers, unlike typical use), or else a pointer to +// something that supports a Read()/Write() method. +// +// For technical reasons, we implement read_data/write_data in two +// stages. The actual work is done in *_data_internal, which takes +// the stream argument twice: once as a template type, and once with +// normal type information. (We only use the second version.) We do +// this because of how C++ picks what function overload to use. If we +// implemented this the naive way: +// bool read_data(istream* is, const void* data, size_t length); +// template read_data(T* fp, const void* data, size_t length); +// C++ would prefer the second version for every stream type except +// istream. However, we want C++ to prefer the first version for +// streams that are *subclasses* of istream, such as istringstream. +// This is not possible given the way template types are resolved. So +// we split the stream argument in two, one of which is templated and +// one of which is not. The specialized functions (like the istream +// version above) ignore the template arg and use the second, 'type' +// arg, getting subclass matching as normal. The 'catch-all' +// functions (the second version above) use the template arg to deduce +// the type, and use a second, void* arg to achieve the desired +// 'catch-all' semantics. + +// ----- low-level I/O for FILE* ---- + +template +inline bool read_data_internal(Ignored*, FILE* fp, + void* data, size_t length) { + return fread(data, length, 1, fp) == 1; +} + +template +inline bool write_data_internal(Ignored*, FILE* fp, + const void* data, size_t length) { + return fwrite(data, length, 1, fp) == 1; +} + +// ----- low-level I/O for iostream ---- + +// We want the caller to be responsible for #including , not +// us, because iostream is a big header! According to the standard, +// it's only legal to delay the instantiation the way we want to if +// the istream/ostream is a template type. So we jump through hoops. +template +inline bool read_data_internal_for_istream(ISTREAM* fp, + void* data, size_t length) { + return fp->read(reinterpret_cast(data), length).good(); +} +template +inline bool read_data_internal(Ignored*, std::istream* fp, + void* data, size_t length) { + return read_data_internal_for_istream(fp, data, length); +} + +template +inline bool write_data_internal_for_ostream(OSTREAM* fp, + const void* data, size_t length) { + return fp->write(reinterpret_cast(data), length).good(); +} +template +inline bool write_data_internal(Ignored*, std::ostream* fp, + const void* data, size_t length) { + return write_data_internal_for_ostream(fp, data, length); +} + +// ----- low-level I/O for custom streams ---- + +// The INPUT type needs to support a Read() method that takes a +// buffer and a length and returns the number of bytes read. +template +inline bool read_data_internal(INPUT* fp, void*, + void* data, size_t length) { + return static_cast(fp->Read(data, length)) == length; +} + +// The OUTPUT type needs to support a Write() operation that takes +// a buffer and a length and returns the number of bytes written. +template +inline bool write_data_internal(OUTPUT* fp, void*, + const void* data, size_t length) { + return static_cast(fp->Write(data, length)) == length; +} + +// ----- low-level I/O: the public API ---- + +template +inline bool read_data(INPUT* fp, void* data, size_t length) { + return read_data_internal(fp, fp, data, length); +} + +template +inline bool write_data(OUTPUT* fp, const void* data, size_t length) { + return write_data_internal(fp, fp, data, length); +} + +// Uses read_data() and write_data() to read/write an integer. +// length is the number of bytes to read/write (which may differ +// from sizeof(IntType), allowing us to save on a 32-bit system +// and load on a 64-bit system). Excess bytes are taken to be 0. +// INPUT and OUTPUT must match legal inputs to read/write_data (above). +template +bool read_bigendian_number(INPUT* fp, IntType* value, size_t length) { + *value = 0; + unsigned char byte; + // We require IntType to be unsigned or else the shifting gets all screwy. + SPARSEHASH_COMPILE_ASSERT(static_cast(-1) > static_cast(0), + serializing_int_requires_an_unsigned_type); + for (size_t i = 0; i < length; ++i) { + if (!read_data(fp, &byte, sizeof(byte))) return false; + *value |= static_cast(byte) << ((length - 1 - i) * 8); + } + return true; +} + +template +bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) { + unsigned char byte; + // We require IntType to be unsigned or else the shifting gets all screwy. + SPARSEHASH_COMPILE_ASSERT(static_cast(-1) > static_cast(0), + serializing_int_requires_an_unsigned_type); + for (size_t i = 0; i < length; ++i) { + byte = (sizeof(value) <= length-1 - i) + ? 0 : static_cast((value >> ((length-1 - i) * 8)) & 255); + if (!write_data(fp, &byte, sizeof(byte))) return false; + } + return true; +} + +// If your keys and values are simple enough, you can pass this +// serializer to serialize()/unserialize(). "Simple enough" means +// value_type is a POD type that contains no pointers. Note, +// however, we don't try to normalize endianness. +// This is the type used for NopointerSerializer. +template struct pod_serializer { + template + bool operator()(INPUT* fp, value_type* value) const { + return read_data(fp, value, sizeof(*value)); + } + + template + bool operator()(OUTPUT* fp, const value_type& value) const { + return write_data(fp, &value, sizeof(value)); + } +}; + + +// Settings contains parameters for growing and shrinking the table. +// It also packages zero-size functor (ie. hasher). +// +// It does some munging of the hash value in cases where we think +// (fear) the original hash function might not be very good. In +// particular, the default hash of pointers is the identity hash, +// so probably all the low bits are 0. We identify when we think +// we're hashing a pointer, and chop off the low bits. Note this +// isn't perfect: even when the key is a pointer, we can't tell +// for sure that the hash is the identity hash. If it's not, this +// is needless work (and possibly, though not likely, harmful). + +template +class sh_hashtable_settings : public HashFunc { + public: + typedef Key key_type; + typedef HashFunc hasher; + typedef SizeType size_type; + + public: + sh_hashtable_settings(const hasher& hf, + const float ht_occupancy_flt, + const float ht_empty_flt) + : hasher(hf), + enlarge_threshold_(0), + shrink_threshold_(0), + consider_shrink_(false), + use_empty_(false), + use_deleted_(false), + num_ht_copies_(0) { + set_enlarge_factor(ht_occupancy_flt); + set_shrink_factor(ht_empty_flt); + } + + size_type hash(const key_type& v) const { + // We munge the hash value when we don't trust hasher::operator(). + return hash_munger::MungedHash(hasher::operator()(v)); + } + + float enlarge_factor() const { + return enlarge_factor_; + } + void set_enlarge_factor(float f) { + enlarge_factor_ = f; + } + float shrink_factor() const { + return shrink_factor_; + } + void set_shrink_factor(float f) { + shrink_factor_ = f; + } + + size_type enlarge_threshold() const { + return enlarge_threshold_; + } + void set_enlarge_threshold(size_type t) { + enlarge_threshold_ = t; + } + size_type shrink_threshold() const { + return shrink_threshold_; + } + void set_shrink_threshold(size_type t) { + shrink_threshold_ = t; + } + + size_type enlarge_size(size_type x) const { + return static_cast(x * enlarge_factor_); + } + size_type shrink_size(size_type x) const { + return static_cast(x * shrink_factor_); + } + + bool consider_shrink() const { + return consider_shrink_; + } + void set_consider_shrink(bool t) { + consider_shrink_ = t; + } + + bool use_empty() const { + return use_empty_; + } + void set_use_empty(bool t) { + use_empty_ = t; + } + + bool use_deleted() const { + return use_deleted_; + } + void set_use_deleted(bool t) { + use_deleted_ = t; + } + + size_type num_ht_copies() const { + return static_cast(num_ht_copies_); + } + void inc_num_ht_copies() { + ++num_ht_copies_; + } + + // Reset the enlarge and shrink thresholds + void reset_thresholds(size_type num_buckets) { + set_enlarge_threshold(enlarge_size(num_buckets)); + set_shrink_threshold(shrink_size(num_buckets)); + // whatever caused us to reset already considered + set_consider_shrink(false); + } + + // Caller is resposible for calling reset_threshold right after + // set_resizing_parameters. + void set_resizing_parameters(float shrink, float grow) { + assert(shrink >= 0.0); + assert(grow <= 1.0); + if (shrink > grow/2.0f) + shrink = grow / 2.0f; // otherwise we thrash hashtable size + set_shrink_factor(shrink); + set_enlarge_factor(grow); + } + + // This is the smallest size a hashtable can be without being too crowded + // If you like, you can give a min #buckets as well as a min #elts + size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) { + float enlarge = enlarge_factor(); + size_type sz = HT_MIN_BUCKETS; // min buckets allowed + while ( sz < min_buckets_wanted || + num_elts >= static_cast(sz * enlarge) ) { + // This just prevents overflowing size_type, since sz can exceed + // max_size() here. + if (static_cast(sz * 2) < sz) { + throw std::length_error("resize overflow"); // protect against overflow + } + sz *= 2; + } + return sz; + } + + private: + template class hash_munger { + public: + static size_t MungedHash(size_t hash) { + return hash; + } + }; + // This matches when the hashtable key is a pointer. + template class hash_munger { + public: + static size_t MungedHash(size_t hash) { + // TODO(csilvers): consider rotating instead: + // static const int shift = (sizeof(void *) == 4) ? 2 : 3; + // return (hash << (sizeof(hash) * 8) - shift)) | (hash >> shift); + // This matters if we ever change sparse/dense_hash_* to compare + // hashes before comparing actual values. It's speedy on x86. + return hash / sizeof(void*); // get rid of known-0 bits + } + }; + + size_type enlarge_threshold_; // table.size() * enlarge_factor + size_type shrink_threshold_; // table.size() * shrink_factor + float enlarge_factor_; // how full before resize + float shrink_factor_; // how empty before resize + // consider_shrink=true if we should try to shrink before next insert + bool consider_shrink_; + bool use_empty_; // used only by densehashtable, not sparsehashtable + bool use_deleted_; // false until delkey has been set + // num_ht_copies is a counter incremented every Copy/Move + unsigned int num_ht_copies_; +}; + +} // namespace sparsehash_internal + +#undef SPARSEHASH_COMPILE_ASSERT +_END_GOOGLE_NAMESPACE_ + +#endif // UTIL_GTL_HASHTABLE_COMMON_H_ diff --git a/src/third_party/sparsehash/src/sparsehash/internal/libc_allocator_with_realloc.h b/src/third_party/sparsehash/src/sparsehash/internal/libc_allocator_with_realloc.h new file mode 100644 index 000000000..0c1e03d75 --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/internal/libc_allocator_with_realloc.h @@ -0,0 +1,119 @@ +// Copyright (c) 2010, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- + +#ifndef UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_ +#define UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_ + +#include +#include // for malloc/realloc/free +#include // for ptrdiff_t +#include // for placement new + +_START_GOOGLE_NAMESPACE_ + +template +class libc_allocator_with_realloc { + public: + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + + libc_allocator_with_realloc() {} + libc_allocator_with_realloc(const libc_allocator_with_realloc&) {} + ~libc_allocator_with_realloc() {} + + pointer address(reference r) const { return &r; } + const_pointer address(const_reference r) const { return &r; } + + pointer allocate(size_type n, const_pointer = 0) { + return static_cast(malloc(n * sizeof(value_type))); + } + void deallocate(pointer p, size_type) { + free(p); + } + pointer reallocate(pointer p, size_type n) { + return static_cast(realloc(p, n * sizeof(value_type))); + } + + size_type max_size() const { + return static_cast(-1) / sizeof(value_type); + } + + void construct(pointer p, const value_type& val) { + new(p) value_type(val); + } + void destroy(pointer p) { p->~value_type(); } + + template + libc_allocator_with_realloc(const libc_allocator_with_realloc&) {} + + template + struct rebind { + typedef libc_allocator_with_realloc other; + }; +}; + +// libc_allocator_with_realloc specialization. +template<> +class libc_allocator_with_realloc { + public: + typedef void value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef void* pointer; + typedef const void* const_pointer; + + template + struct rebind { + typedef libc_allocator_with_realloc other; + }; +}; + +template +inline bool operator==(const libc_allocator_with_realloc&, + const libc_allocator_with_realloc&) { + return true; +} + +template +inline bool operator!=(const libc_allocator_with_realloc&, + const libc_allocator_with_realloc&) { + return false; +} + +_END_GOOGLE_NAMESPACE_ + +#endif // UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_ diff --git a/src/third_party/sparsehash/src/sparsehash/template_util.h b/src/third_party/sparsehash/src/sparsehash/template_util.h new file mode 100644 index 000000000..6fec3d092 --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/template_util.h @@ -0,0 +1,134 @@ +// Copyright 2005 Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---- +// +// Template metaprogramming utility functions. +// +// This code is compiled directly on many platforms, including client +// platforms like Windows, Mac, and embedded systems. Before making +// any changes here, make sure that you're not breaking any platforms. +// +// +// The names choosen here reflect those used in tr1 and the boost::mpl +// library, there are similar operations used in the Loki library as +// well. I prefer the boost names for 2 reasons: +// 1. I think that portions of the Boost libraries are more likely to +// be included in the c++ standard. +// 2. It is not impossible that some of the boost libraries will be +// included in our own build in the future. +// Both of these outcomes means that we may be able to directly replace +// some of these with boost equivalents. +// +#ifndef BASE_TEMPLATE_UTIL_H_ +#define BASE_TEMPLATE_UTIL_H_ + +#include +_START_GOOGLE_NAMESPACE_ + +// Types small_ and big_ are guaranteed such that sizeof(small_) < +// sizeof(big_) +typedef char small_; + +struct big_ { + char dummy[2]; +}; + +// Identity metafunction. +template +struct identity_ { + typedef T type; +}; + +// integral_constant, defined in tr1, is a wrapper for an integer +// value. We don't really need this generality; we could get away +// with hardcoding the integer type to bool. We use the fully +// general integer_constant for compatibility with tr1. + +template +struct integral_constant { + static const T value = v; + typedef T value_type; + typedef integral_constant type; +}; + +template const T integral_constant::value; + + +// Abbreviations: true_type and false_type are structs that represent boolean +// true and false values. Also define the boost::mpl versions of those names, +// true_ and false_. +typedef integral_constant true_type; +typedef integral_constant false_type; +typedef true_type true_; +typedef false_type false_; + +// if_ is a templatized conditional statement. +// if_ is a compile time evaluation of cond. +// if_<>::type contains A if cond is true, B otherwise. +template +struct if_{ + typedef A type; +}; + +template +struct if_ { + typedef B type; +}; + + +// type_equals_ is a template type comparator, similar to Loki IsSameType. +// type_equals_::value is true iff "A" is the same type as "B". +// +// New code should prefer base::is_same, defined in base/type_traits.h. +// It is functionally identical, but is_same is the standard spelling. +template +struct type_equals_ : public false_ { +}; + +template +struct type_equals_ : public true_ { +}; + +// and_ is a template && operator. +// and_::value evaluates "A::value && B::value". +template +struct and_ : public integral_constant { +}; + +// or_ is a template || operator. +// or_::value evaluates "A::value || B::value". +template +struct or_ : public integral_constant { +}; + + +_END_GOOGLE_NAMESPACE_ + +#endif // BASE_TEMPLATE_UTIL_H_ diff --git a/src/third_party/sparsehash/src/sparsehash/type_traits.h b/src/third_party/sparsehash/src/sparsehash/type_traits.h new file mode 100644 index 000000000..f909cf9a3 --- /dev/null +++ b/src/third_party/sparsehash/src/sparsehash/type_traits.h @@ -0,0 +1,342 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---- +// +// This code is compiled directly on many platforms, including client +// platforms like Windows, Mac, and embedded systems. Before making +// any changes here, make sure that you're not breaking any platforms. +// +// Define a small subset of tr1 type traits. The traits we define are: +// is_integral +// is_floating_point +// is_pointer +// is_enum +// is_reference +// is_pod +// has_trivial_constructor +// has_trivial_copy +// has_trivial_assign +// has_trivial_destructor +// remove_const +// remove_volatile +// remove_cv +// remove_reference +// add_reference +// remove_pointer +// is_same +// is_convertible +// We can add more type traits as required. + +#ifndef BASE_TYPE_TRAITS_H_ +#define BASE_TYPE_TRAITS_H_ + +#include +#include // For pair + +#include // For true_type and false_type + +_START_GOOGLE_NAMESPACE_ + +template struct is_integral; +template struct is_floating_point; +template struct is_pointer; +// MSVC can't compile this correctly, and neither can gcc 3.3.5 (at least) +#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) +// is_enum uses is_convertible, which is not available on MSVC. +template struct is_enum; +#endif +template struct is_reference; +template struct is_pod; +template struct has_trivial_constructor; +template struct has_trivial_copy; +template struct has_trivial_assign; +template struct has_trivial_destructor; +template struct remove_const; +template struct remove_volatile; +template struct remove_cv; +template struct remove_reference; +template struct add_reference; +template struct remove_pointer; +template struct is_same; +#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) +template struct is_convertible; +#endif + +// is_integral is false except for the built-in integer types. A +// cv-qualified type is integral if and only if the underlying type is. +template struct is_integral : false_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +#if defined(_MSC_VER) +// wchar_t is not by default a distinct type from unsigned short in +// Microsoft C. +// See http://msdn2.microsoft.com/en-us/library/dh8che7s(VS.80).aspx +template<> struct is_integral<__wchar_t> : true_type { }; +#else +template<> struct is_integral : true_type { }; +#endif +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +#ifdef HAVE_LONG_LONG +template<> struct is_integral : true_type { }; +template<> struct is_integral : true_type { }; +#endif +template struct is_integral : is_integral { }; +template struct is_integral : is_integral { }; +template struct is_integral : is_integral { }; + +// is_floating_point is false except for the built-in floating-point types. +// A cv-qualified type is integral if and only if the underlying type is. +template struct is_floating_point : false_type { }; +template<> struct is_floating_point : true_type { }; +template<> struct is_floating_point : true_type { }; +template<> struct is_floating_point : true_type { }; +template struct is_floating_point + : is_floating_point { }; +template struct is_floating_point + : is_floating_point { }; +template struct is_floating_point + : is_floating_point { }; + +// is_pointer is false except for pointer types. A cv-qualified type (e.g. +// "int* const", as opposed to "int const*") is cv-qualified if and only if +// the underlying type is. +template struct is_pointer : false_type { }; +template struct is_pointer : true_type { }; +template struct is_pointer : is_pointer { }; +template struct is_pointer : is_pointer { }; +template struct is_pointer : is_pointer { }; + +#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) + +namespace internal { + +template struct is_class_or_union { + template static small_ tester(void (U::*)()); + template static big_ tester(...); + static const bool value = sizeof(tester(0)) == sizeof(small_); +}; + +// is_convertible chokes if the first argument is an array. That's why +// we use add_reference here. +template struct is_enum_impl + : is_convertible::type, int> { }; + +template struct is_enum_impl : false_type { }; + +} // namespace internal + +// Specified by TR1 [4.5.1] primary type categories. + +// Implementation note: +// +// Each type is either void, integral, floating point, array, pointer, +// reference, member object pointer, member function pointer, enum, +// union or class. Out of these, only integral, floating point, reference, +// class and enum types are potentially convertible to int. Therefore, +// if a type is not a reference, integral, floating point or class and +// is convertible to int, it's a enum. Adding cv-qualification to a type +// does not change whether it's an enum. +// +// Is-convertible-to-int check is done only if all other checks pass, +// because it can't be used with some types (e.g. void or classes with +// inaccessible conversion operators). +template struct is_enum + : internal::is_enum_impl< + is_same::value || + is_integral::value || + is_floating_point::value || + is_reference::value || + internal::is_class_or_union::value, + T> { }; + +template struct is_enum : is_enum { }; +template struct is_enum : is_enum { }; +template struct is_enum : is_enum { }; + +#endif + +// is_reference is false except for reference types. +template struct is_reference : false_type {}; +template struct is_reference : true_type {}; + + +// We can't get is_pod right without compiler help, so fail conservatively. +// We will assume it's false except for arithmetic types, enumerations, +// pointers and cv-qualified versions thereof. Note that std::pair +// is not a POD even if T and U are PODs. +template struct is_pod + : integral_constant::value || + is_floating_point::value || +#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) + // is_enum is not available on MSVC. + is_enum::value || +#endif + is_pointer::value)> { }; +template struct is_pod : is_pod { }; +template struct is_pod : is_pod { }; +template struct is_pod : is_pod { }; + + +// We can't get has_trivial_constructor right without compiler help, so +// fail conservatively. We will assume it's false except for: (1) types +// for which is_pod is true. (2) std::pair of types with trivial +// constructors. (3) array of a type with a trivial constructor. +// (4) const versions thereof. +template struct has_trivial_constructor : is_pod { }; +template struct has_trivial_constructor > + : integral_constant::value && + has_trivial_constructor::value)> { }; +template struct has_trivial_constructor + : has_trivial_constructor { }; +template struct has_trivial_constructor + : has_trivial_constructor { }; + +// We can't get has_trivial_copy right without compiler help, so fail +// conservatively. We will assume it's false except for: (1) types +// for which is_pod is true. (2) std::pair of types with trivial copy +// constructors. (3) array of a type with a trivial copy constructor. +// (4) const versions thereof. +template struct has_trivial_copy : is_pod { }; +template struct has_trivial_copy > + : integral_constant::value && + has_trivial_copy::value)> { }; +template struct has_trivial_copy + : has_trivial_copy { }; +template struct has_trivial_copy : has_trivial_copy { }; + +// We can't get has_trivial_assign right without compiler help, so fail +// conservatively. We will assume it's false except for: (1) types +// for which is_pod is true. (2) std::pair of types with trivial copy +// constructors. (3) array of a type with a trivial assign constructor. +template struct has_trivial_assign : is_pod { }; +template struct has_trivial_assign > + : integral_constant::value && + has_trivial_assign::value)> { }; +template struct has_trivial_assign + : has_trivial_assign { }; + +// We can't get has_trivial_destructor right without compiler help, so +// fail conservatively. We will assume it's false except for: (1) types +// for which is_pod is true. (2) std::pair of types with trivial +// destructors. (3) array of a type with a trivial destructor. +// (4) const versions thereof. +template struct has_trivial_destructor : is_pod { }; +template struct has_trivial_destructor > + : integral_constant::value && + has_trivial_destructor::value)> { }; +template struct has_trivial_destructor + : has_trivial_destructor { }; +template struct has_trivial_destructor + : has_trivial_destructor { }; + +// Specified by TR1 [4.7.1] +template struct remove_const { typedef T type; }; +template struct remove_const { typedef T type; }; +template struct remove_volatile { typedef T type; }; +template struct remove_volatile { typedef T type; }; +template struct remove_cv { + typedef typename remove_const::type>::type type; +}; + + +// Specified by TR1 [4.7.2] Reference modifications. +template struct remove_reference { typedef T type; }; +template struct remove_reference { typedef T type; }; + +template struct add_reference { typedef T& type; }; +template struct add_reference { typedef T& type; }; + +// Specified by TR1 [4.7.4] Pointer modifications. +template struct remove_pointer { typedef T type; }; +template struct remove_pointer { typedef T type; }; +template struct remove_pointer { typedef T type; }; +template struct remove_pointer { typedef T type; }; +template struct remove_pointer { + typedef T type; }; + +// Specified by TR1 [4.6] Relationships between types +template struct is_same : public false_type { }; +template struct is_same : public true_type { }; + +// Specified by TR1 [4.6] Relationships between types +#if !defined(_MSC_VER) && !(defined(__GNUC__) && __GNUC__ <= 3) +namespace internal { + +// This class is an implementation detail for is_convertible, and you +// don't need to know how it works to use is_convertible. For those +// who care: we declare two different functions, one whose argument is +// of type To and one with a variadic argument list. We give them +// return types of different size, so we can use sizeof to trick the +// compiler into telling us which function it would have chosen if we +// had called it with an argument of type From. See Alexandrescu's +// _Modern C++ Design_ for more details on this sort of trick. + +template +struct ConvertHelper { + static small_ Test(To); + static big_ Test(...); + static From Create(); +}; +} // namespace internal + +// Inherits from true_type if From is convertible to To, false_type otherwise. +template +struct is_convertible + : integral_constant::Test( + internal::ConvertHelper::Create())) + == sizeof(small_)> { +}; +#endif + +_END_GOOGLE_NAMESPACE_ + +// Right now these macros are no-ops, and mostly just document the fact +// these types are PODs, for human use. They may be made more contentful +// later. The typedef is just to make it legal to put a semicolon after +// these macros. +#define DECLARE_POD(TypeName) typedef int Dummy_Type_For_DECLARE_POD +#define DECLARE_NESTED_POD(TypeName) DECLARE_POD(TypeName) +#define PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT(TemplateName) \ + typedef int Dummy_Type_For_PROPAGATE_POD_FROM_TEMPLATE_ARGUMENT +#define ENFORCE_POD(TypeName) typedef int Dummy_Type_For_ENFORCE_POD + +#endif // BASE_TYPE_TRAITS_H_ diff --git a/src/token_aware_policy.cpp b/src/token_aware_policy.cpp index 06e0c1033..3615573d1 100644 --- a/src/token_aware_policy.cpp +++ b/src/token_aware_policy.cpp @@ -32,7 +32,7 @@ static inline bool contains(const CopyOnWriteHostVec& replicas, const Address& a QueryPlan* TokenAwarePolicy::new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache) { if (request != NULL) { switch (request->opcode()) { @@ -46,12 +46,14 @@ QueryPlan* TokenAwarePolicy::new_query_plan(const std::string& connected_keyspac ? connected_keyspace : statement_keyspace; std::string routing_key; if (rr->get_routing_key(&routing_key, cache) && !keyspace.empty()) { - CopyOnWriteHostVec replicas = token_map.get_replicas(keyspace, routing_key); - if (!replicas->empty()) { - return new TokenAwareQueryPlan(child_policy_.get(), - child_policy_->new_query_plan(connected_keyspace, request, token_map, cache), - replicas, - index_++); + if (token_map != NULL) { + CopyOnWriteHostVec replicas = token_map->get_replicas(keyspace, routing_key); + if (replicas && !replicas->empty()) { + return new TokenAwareQueryPlan(child_policy_.get(), + child_policy_->new_query_plan(connected_keyspace, request, token_map, cache), + replicas, + index_++); + } } } break; diff --git a/src/token_aware_policy.hpp b/src/token_aware_policy.hpp index b18cdab45..e1ea6d191 100644 --- a/src/token_aware_policy.hpp +++ b/src/token_aware_policy.hpp @@ -34,7 +34,7 @@ class TokenAwarePolicy : public ChainedLoadBalancingPolicy { virtual QueryPlan* new_query_plan(const std::string& connected_keyspace, const Request* request, - const TokenMap& token_map, + const TokenMap* token_map, Request::EncodingCache* cache); LoadBalancingPolicy* new_instance() { return new TokenAwarePolicy(child_policy_->new_instance()); } diff --git a/src/token_map.cpp b/src/token_map.cpp index 9c6a8ef06..89857e703 100644 --- a/src/token_map.cpp +++ b/src/token_map.cpp @@ -16,279 +16,21 @@ #include "token_map.hpp" -#include "constants.hpp" -#include "logger.hpp" -#include "md5.hpp" -#include "murmur3.hpp" -#include "scoped_ptr.hpp" -#include "utils.hpp" - -#include - -#include -#include +#include "token_map_impl.hpp" namespace cass { -static const CopyOnWriteHostVec NO_REPLICAS(new HostVec()); - -static int64_t parse_int64(const char* p, size_t n) { - int c; - const char* s = p; - for (; n != 0 && isspace(c = *s); ++s, --n) {} - - if (n == 0) { - return 0; - } - - int64_t sign = 1; - if (c == '-') { - sign = -1; - ++s; --n; - } - - int64_t value = 0; - for (; n != 0 && isdigit(c = *s); ++s, --n) { - value *= 10; - value += c - '0'; - } - - return sign * value; -} - -static void parse_int128(const char* p, size_t n, uint8_t* output) { - // no sign handling because C* uses [0, 2^127] - int c; - const char* s = p; - - for (; n != 0 && isspace(c = *s); ++s, --n) {} - - if (n == 0) { - memset(output, 0, sizeof(uint64_t) * 2); - return; - } - - uint64_t hi = 0; - uint64_t lo = 0; - uint64_t hi_tmp; - uint64_t lo_tmp; - uint64_t lo_tmp2; - for (; n != 0 && isdigit(c = *s); ++s, --n) { - hi_tmp = hi; - lo_tmp = lo; - - //value *= 10; - lo = lo_tmp << 1; - hi = (lo_tmp >> 63) + (hi_tmp << 1); - lo_tmp2 = lo; - lo += lo_tmp << 3; - hi += (lo_tmp >> 61) + (hi_tmp << 3) + (lo < lo_tmp2 ? 1 : 0); - - //value += c - '0'; - lo_tmp = lo; - lo += c - '0'; - hi += (lo < lo_tmp) ? 1 : 0; - } - - encode_uint64(output, hi); - encode_uint64(output + sizeof(uint64_t), lo); -} - -void TokenMap::clear() { - mapped_addresses_.clear(); - token_map_.clear(); - keyspace_replica_map_.clear(); - keyspace_strategy_map_.clear(); -} - -void TokenMap::build() { - if (!partitioner_) { - LOG_WARN("No partitioner set, not building map"); - return; - } - - map_replicas(true); -} - -void TokenMap::set_partitioner(const std::string& partitioner_class) { - // Only set the partition once - if (partitioner_) return; - - if (ends_with(partitioner_class, Murmur3Partitioner::PARTITIONER_CLASS)) { - partitioner_.reset(new Murmur3Partitioner()); - } else if (ends_with(partitioner_class, RandomPartitioner::PARTITIONER_CLASS)) { - partitioner_.reset(new RandomPartitioner()); - } else if (ends_with(partitioner_class, ByteOrderedPartitioner::PARTITIONER_CLASS)) { - partitioner_.reset(new ByteOrderedPartitioner()); +TokenMap* TokenMap::from_partitioner(StringRef partitioner) { + if (ends_with(partitioner, Murmur3Partitioner::name())) { + return new TokenMapImpl(); + } else if (ends_with(partitioner, RandomPartitioner::name())) { + return new TokenMapImpl(); + } else if (ends_with(partitioner, ByteOrderedPartitioner::name())) { + return new TokenMapImpl(); } else { - LOG_WARN("Unsupported partitioner class '%s'", partitioner_class.c_str()); + LOG_WARN("Unsupported partitioner class '%s'", partitioner.to_string().c_str()); + return NULL; } } -void TokenMap::update_host(SharedRefPtr& host, const TokenStringList& token_strings) { - if (!partitioner_) return; - - // There's a chance to avoid purging if tokens are the same as existing; deemed - // not worth the complexity because: - // 1.) Updates should only happen on "new" host, or "moved" - // 2.) Moving should only occur on non-vnode clusters, in which case the - // token map is relatively small and easy to purge/repopulate - purge_address(host->address()); - - for (TokenStringList::const_iterator i = token_strings.begin(); - i != token_strings.end(); ++i) { - token_map_[partitioner_->token_from_string_ref(*i)] = host; - } - mapped_addresses_.insert(host->address()); - map_replicas(); -} - -void TokenMap::remove_host(SharedRefPtr& host) { - if (!partitioner_) return; - - if (purge_address(host->address())) { - map_replicas(); - } -} - -void TokenMap::update_keyspace(const std::string& ks_name, const KeyspaceMetadata& ks_meta) { - if (!partitioner_) return; - - KeyspaceStrategyMap::iterator i = keyspace_strategy_map_.find(ks_name); - if (i == keyspace_strategy_map_.end() || !i->second->equal(ks_meta)) { - SharedRefPtr strategy(ReplicationStrategy::from_keyspace_meta(ks_meta)); - map_keyspace_replicas(ks_name, strategy); - if (i == keyspace_strategy_map_.end()) { - keyspace_strategy_map_[ks_name] = strategy; - } else { - i->second = strategy; - } - } -} - -void TokenMap::drop_keyspace(const std::string& ks_name) { - if (!partitioner_) return; - - keyspace_replica_map_.erase(ks_name); - keyspace_strategy_map_.erase(ks_name); -} - -const CopyOnWriteHostVec& TokenMap::get_replicas(const std::string& ks_name, - const std::string& routing_key) const { - if (!partitioner_) return NO_REPLICAS; - - KeyspaceReplicaMap::const_iterator tokens_it = keyspace_replica_map_.find(ks_name); - if (tokens_it != keyspace_replica_map_.end()) { - const TokenReplicaMap& tokens_to_replicas = tokens_it->second; - - const Token t = partitioner_->hash(reinterpret_cast(routing_key.data()), routing_key.size()); - TokenReplicaMap::const_iterator replicas_it = tokens_to_replicas.upper_bound(t); - - if (replicas_it != tokens_to_replicas.end()) { - return replicas_it->second; - } else { - if (!tokens_to_replicas.empty()) { - return tokens_to_replicas.begin()->second; - } - } - } - return NO_REPLICAS; -} - -void TokenMap::set_replication_strategy(const std::string& ks_name, - const SharedRefPtr& strategy) { - keyspace_strategy_map_[ks_name] = strategy; - map_keyspace_replicas(ks_name, strategy); -} - -void TokenMap::map_replicas(bool force) { - if (keyspace_replica_map_.empty() && !force) {// do nothing ahead of first build - return; - } - for (KeyspaceStrategyMap::const_iterator i = keyspace_strategy_map_.begin(); - i != keyspace_strategy_map_.end(); ++i) { - map_keyspace_replicas(i->first, i->second, force); - } -} - -void TokenMap::map_keyspace_replicas(const std::string& ks_name, - const SharedRefPtr& strategy, - bool force) { - if (keyspace_replica_map_.empty() && !force) {// do nothing ahead of first build - return; - } - strategy->tokens_to_replicas(token_map_, &keyspace_replica_map_[ks_name]); -} - -bool TokenMap::purge_address(const Address& addr) { - AddressSet::iterator addr_itr = mapped_addresses_.find(addr); - if (addr_itr == mapped_addresses_.end()) { - return false; - } - - TokenHostMap::iterator i = token_map_.begin(); - while (i != token_map_.end()) { - if (addr.compare(i->second->address()) == 0) { - TokenHostMap::iterator to_erase = i++; - token_map_.erase(to_erase); - } else { - ++i; - } - } - - mapped_addresses_.erase(addr_itr); - return true; -} - - -const std::string Murmur3Partitioner::PARTITIONER_CLASS("Murmur3Partitioner"); - -Token Murmur3Partitioner::token_from_string_ref(const StringRef& token_string_ref) const { - Token token(sizeof(int64_t), 0); - int64_t token_value = parse_int64(token_string_ref.data(), token_string_ref.size()); - encode_uint64(&token[0], static_cast(token_value) + CASS_UINT64_MAX / 2); - return token; -} - -Token Murmur3Partitioner::hash(const uint8_t* data, size_t size) const { - Token token(sizeof(int64_t), 0); - int64_t token_value = MurmurHash3_x64_128(data, size, 0); - if (token_value == CASS_INT64_MIN) { - token_value = CASS_INT64_MAX; - } - encode_uint64(&token[0], static_cast(token_value) + CASS_UINT64_MAX / 2); - return token; -} - -const std::string RandomPartitioner::PARTITIONER_CLASS("RandomPartitioner"); - -Token RandomPartitioner::token_from_string_ref(const StringRef& token_string_ref) const { - Token token(sizeof(uint64_t) * 2, 0); - parse_int128(token_string_ref.data(), token_string_ref.size(), &token[0]); - return token; -} - -Token RandomPartitioner::hash(const uint8_t* data, size_t size) const { - Md5 hash; - hash.update(data, size); - - Token token(sizeof(uint64_t) * 2, 0); - hash.final(&token[0]); - return token; -} - -const std::string ByteOrderedPartitioner::PARTITIONER_CLASS("ByteOrderedPartitioner"); - -Token ByteOrderedPartitioner::token_from_string_ref(const StringRef& token_string_ref) const { - const uint8_t* data = reinterpret_cast(token_string_ref.data()); - size_t size = token_string_ref.size(); - return Token(data, data + size); -} - -Token ByteOrderedPartitioner::hash(const uint8_t* data, size_t size) const { - const uint8_t* first = static_cast(data); - Token token(first, first + size); - return token; -} - -} +} // namespace cass diff --git a/src/token_map.hpp b/src/token_map.hpp index 0be75bab2..592c0282d 100644 --- a/src/token_map.hpp +++ b/src/token_map.hpp @@ -17,93 +17,37 @@ #ifndef __CASS_TOKEN_MAP_HPP_INCLUDED__ #define __CASS_TOKEN_MAP_HPP_INCLUDED__ -#include "buffer.hpp" -#include "copy_on_write_ptr.hpp" #include "host.hpp" -#include "replication_strategy.hpp" -#include "scoped_ptr.hpp" -#include "string_ref.hpp" -#include -#include +#include namespace cass { -typedef std::vector TokenStringList; - -class Partitioner { -public: - virtual ~Partitioner() {} - virtual Token token_from_string_ref(const StringRef& token_string_ref) const = 0; - virtual Token hash(const uint8_t* data, size_t size) const = 0; -}; +class VersionNumber; +class Value; +class ResultResponse; +class StringRef; class TokenMap { public: - virtual ~TokenMap() {} - - void clear(); - void build(); - - void set_partitioner(const std::string& partitioner_class); - void update_host(SharedRefPtr& host, const TokenStringList& token_strings); - void remove_host(SharedRefPtr& host); - void update_keyspace(const std::string& ks_name, const KeyspaceMetadata& ks_meta); - void drop_keyspace(const std::string& ks_name); - const CopyOnWriteHostVec& get_replicas(const std::string& ks_name, - const std::string& routing_key) const; - - // Testing only - void set_replication_strategy(const std::string& ks_name, - const SharedRefPtr& strategy); - -private: - void map_replicas(bool force = false); - void map_keyspace_replicas(const std::string& ks_name, - const SharedRefPtr& strategy, - bool force = false); - bool purge_address(const Address& addr); - -protected: - TokenHostMap token_map_; - - typedef std::map KeyspaceReplicaMap; - KeyspaceReplicaMap keyspace_replica_map_; - - typedef std::map > KeyspaceStrategyMap; - KeyspaceStrategyMap keyspace_strategy_map_; - - typedef std::set
AddressSet; - AddressSet mapped_addresses_; - - ScopedPtr partitioner_; -}; - - -class Murmur3Partitioner : public Partitioner { -public: - static const std::string PARTITIONER_CLASS; + static TokenMap* from_partitioner(StringRef partitioner); - virtual Token token_from_string_ref(const StringRef& token_string_ref) const; - virtual Token hash(const uint8_t* data, size_t size) const; -}; + virtual ~TokenMap() { } + virtual void add_host(const Host::Ptr& host, const Value* tokens) = 0; + virtual void update_host_and_build(const Host::Ptr& host, const Value* tokens) = 0; + virtual void remove_host_and_build(const Host::Ptr& host) = 0; + virtual void clear_tokens_and_hosts() = 0; -class RandomPartitioner : public Partitioner { -public: - static const std::string PARTITIONER_CLASS; - - virtual Token token_from_string_ref(const StringRef& token_string_ref) const; - virtual Token hash(const uint8_t* data, size_t size) const; -}; + virtual void add_keyspaces(const VersionNumber& cassandra_version, ResultResponse* result) = 0; + virtual void update_keyspaces_and_build(const VersionNumber& cassandra_version, ResultResponse* result) = 0; + virtual void drop_keyspace(const std::string& keyspace_name) = 0; + virtual void clear_replicas_and_strategies() = 0; + virtual void build() = 0; -class ByteOrderedPartitioner : public Partitioner { -public: - static const std::string PARTITIONER_CLASS; - - virtual Token token_from_string_ref(const StringRef& token_string_ref) const; - virtual Token hash(const uint8_t* data, size_t size) const; + virtual const CopyOnWriteHostVec& get_replicas(const std::string& keyspace_name, + const std::string& routing_key) const = 0; }; } // namespace cass diff --git a/src/token_map_impl.cpp b/src/token_map_impl.cpp new file mode 100644 index 000000000..8289fe415 --- /dev/null +++ b/src/token_map_impl.cpp @@ -0,0 +1,121 @@ +/* + Copyright (c) 2014-2016 DataStax + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "token_map_impl.hpp" + +#include "md5.hpp" +#include "murmur3.hpp" + +namespace cass { + +static int64_t parse_int64(const char* p, size_t n) { + int c; + const char* s = p; + for (; n != 0 && isspace(c = *s); ++s, --n) {} + + if (n == 0) { + return 0; + } + + int64_t sign = 1; + if (c == '-') { + sign = -1; + ++s; --n; + } + + int64_t value = 0; + for (; n != 0 && isdigit(c = *s); ++s, --n) { + value *= 10; + value += c - '0'; + } + + return sign * value; +} + +static void parse_int128(const char* p, size_t n, uint64_t* h, uint64_t* l) { + // no sign handling because C* uses [0, 2^127] + int c; + const char* s = p; + + for (; n != 0 && isspace(c = *s); ++s, --n) {} + + if (n == 0) { + *h = *l = 0; + return; + } + + uint64_t hi = 0; + uint64_t lo = 0; + uint64_t hi_tmp; + uint64_t lo_tmp; + uint64_t lo_tmp2; + for (; n != 0 && isdigit(c = *s); ++s, --n) { + hi_tmp = hi; + lo_tmp = lo; + + //value *= 10; + lo = lo_tmp << 1; + hi = (lo_tmp >> 63) + (hi_tmp << 1); + lo_tmp2 = lo; + lo += lo_tmp << 3; + hi += (lo_tmp >> 61) + (hi_tmp << 3) + (lo < lo_tmp2 ? 1 : 0); + + //value += c - '0'; + lo_tmp = lo; + lo += c - '0'; + hi += (lo < lo_tmp) ? 1 : 0; + } + + *h = hi; + *l = lo; +} + +const uint32_t IdGenerator::EMPTY_KEY(0); +const uint32_t IdGenerator::DELETED_KEY(CASS_UINT32_MAX); + +Murmur3Partitioner::Token Murmur3Partitioner::from_string(const StringRef& str) { + return parse_int64(str.data(), str.size()); +} + +Murmur3Partitioner::Token Murmur3Partitioner::hash(const StringRef& str) { + return MurmurHash3_x64_128(str.data(), str.size(), 0); +} + +RandomPartitioner::Token RandomPartitioner::from_string(const StringRef& str) { + Token token; + parse_int128(str.data(), str.size(), &token.hi, &token.lo); + return token; +} + +RandomPartitioner::Token RandomPartitioner::hash(const StringRef& str) { + Md5 hash; + hash.update(reinterpret_cast(str.data()), str.size()); + Token token; + hash.final(&token.hi, &token.lo); + return token; +} + +ByteOrderedPartitioner::Token ByteOrderedPartitioner::from_string(const StringRef& str) { + const uint8_t* data = reinterpret_cast(str.data()); + return Token(data, data + str.size()); +} + +ByteOrderedPartitioner::Token ByteOrderedPartitioner::hash(const StringRef& str) { + const uint8_t* data = reinterpret_cast(str.data()); + return Token(data, data + str.size()); +} + +} // namespace cass diff --git a/src/token_map_impl.hpp b/src/token_map_impl.hpp new file mode 100644 index 000000000..21e34c9e7 --- /dev/null +++ b/src/token_map_impl.hpp @@ -0,0 +1,781 @@ +/* + Copyright (c) 2014-2016 DataStax + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __CASS_TOKEN_MAP_IMPL_HPP_INCLUDED__ +#define __CASS_TOKEN_MAP_IMPL_HPP_INCLUDED__ + +#include "collection_iterator.hpp" +#include "constants.hpp" +#include "map_iterator.hpp" +#include "result_iterator.hpp" +#include "result_response.hpp" +#include "row.hpp" +#include "string_ref.hpp" +#include "token_map.hpp" +#include "value.hpp" + +#include "third_party/rapidjson/rapidjson/document.h" + +#include + +#include +#include +#include +#include +#include + +#define CASS_NETWORK_TOPOLOGY_STRATEGY "NetworkTopologyStrategy" +#define CASS_SIMPLE_STRATEGY "SimpleStrategy" + +namespace std { + +template<> +struct equal_to { + bool operator()(const cass::Host::Ptr& lhs, const cass::Host::Ptr& rhs) const { + if (lhs == rhs) { + return true; + } + if (!lhs || !rhs) { + return false; + } + return lhs->address() == rhs->address(); + } +}; + +} // namespace std + +namespace cass { + +struct HostHash { + std::size_t operator()(const cass::Host::Ptr& host) const { + if (!host) return 0; + return hash(host->address()); + } + AddressHash hash; +}; + +class IdGenerator { +public: + typedef sparsehash::dense_hash_map IdMap; + + static const uint32_t EMPTY_KEY; + static const uint32_t DELETED_KEY; + + IdGenerator() { + ids_.set_empty_key(std::string()); + } + + uint32_t get(const std::string& key) { + if (key.empty()) { + return 0; + } + + IdMap::const_iterator i = ids_.find(key); + if (i != ids_.end()) { + return i->second; + } + + // This will never generate a 0 identifier. So 0 can be used as + // inalid or empty. + uint32_t id = ids_.size() + 1; + ids_[key] = id; + return id; + } + +private: + IdMap ids_; +}; + +struct Murmur3Partitioner { + typedef int64_t Token; + + static Token from_string(const StringRef& str); + static Token hash(const StringRef& str); + static StringRef name() { return "Murmur3Partitioner"; } +}; + +struct RandomPartitioner { + struct Token { + uint64_t hi; + uint64_t lo; + + bool operator<(const Token& other) const { + return hi == other.hi ? lo < other.lo : hi < other.hi; + } + + bool operator==(const Token& other) const { + return hi == other.hi && lo == other.lo; + } + }; + + static Token from_string(const StringRef& str); + static Token hash(const StringRef& str); + static StringRef name() { return "RandomPartitioner"; } +}; + +class ByteOrderedPartitioner { +public: + typedef std::vector Token; + + static Token from_string(const StringRef& str); + static Token hash(const StringRef& str); + static StringRef name() { return "ByteOrderedPartitioner"; } +}; + +class HostSet : public sparsehash::dense_hash_set { +public: + HostSet() { + set_empty_key(Host::Ptr(new Host(Address::EMPTY_KEY, false))); + set_deleted_key(Host::Ptr(new Host(Address::DELETED_KEY, false))); + } +}; + +class RackSet : public sparsehash::dense_hash_set { +public: + RackSet() { + set_empty_key(IdGenerator::EMPTY_KEY); + set_deleted_key(IdGenerator::DELETED_KEY); + } +}; + +struct Datacenter { + Datacenter() + : num_nodes(0) { } + size_t num_nodes; + RackSet racks; +}; + +class DatacenterMap : public sparsehash::dense_hash_map { +public: + DatacenterMap() { + set_empty_key(IdGenerator::EMPTY_KEY); + set_deleted_key(IdGenerator::DELETED_KEY); + } +}; + +struct ReplicationFactor { + ReplicationFactor() + : count(0) { } + size_t count; + std::string name; // Used for logging the datacenter name + bool operator==(const ReplicationFactor& other) const { + return count == other.count && name == other.name; + } +}; + +inline void build_datacenters(const HostSet& hosts, DatacenterMap& result) { + result.clear(); + for (HostSet::const_iterator i = hosts.begin(), end = hosts.end(); + i != end; ++i) { + uint32_t dc = (*i)->dc_id(); + uint32_t rack = (*i)->rack_id(); + if (dc != 0 && rack != 0) { + Datacenter& datacenter = result[dc]; + datacenter.racks.insert(rack); + datacenter.num_nodes++; + } + } +} + +class ReplicationFactorMap : public sparsehash::dense_hash_map { +public: + ReplicationFactorMap() { + set_empty_key(IdGenerator::EMPTY_KEY); + } +}; + +template +class ReplicationStrategy { +public: + typedef typename Partitioner::Token Token; + + typedef std::pair TokenHost; + typedef std::vector TokenHostVec; + + typedef std::pair TokenReplicas; + typedef std::vector TokenReplicasVec; + + typedef std::deque TokenHostQueue; + + struct DatacenterRackInfo { + DatacenterRackInfo() + : replica_count(0) + , replication_factor(0) + , rack_count(0) { } + size_t replica_count; + size_t replication_factor; + RackSet racks_observed; + size_t rack_count; + TokenHostQueue skipped_endpoints; + }; + + class DatacenterRackInfoMap : public sparsehash::dense_hash_map { + public: + DatacenterRackInfoMap () { + sparsehash::dense_hash_map::set_empty_key(IdGenerator::EMPTY_KEY); + } + }; + + enum Type { + NETWORK_TOPOLOGY_STRATEGY, + SIMPLE_STRATEGY, + NON_REPLICATED + }; + + ReplicationStrategy() + : type_(NON_REPLICATED) { } + + void init(IdGenerator& dc_ids, + const VersionNumber& cassandra_version, + const Row* row); + + bool operator!=(const ReplicationStrategy& other) const { + return type_ != other.type_ || + replication_factors_ != other.replication_factors_; + } + + void build_replicas(const TokenHostVec& tokens, const DatacenterMap& datacenters, + TokenReplicasVec& result) const; + +private: + void build_replicas_network_topology(const TokenHostVec& tokens, const DatacenterMap& datacenters, + TokenReplicasVec& result) const; + void build_replicas_simple(const TokenHostVec& tokens, const DatacenterMap& datacenters, + TokenReplicasVec& result) const ; + void build_replicas_non_replicated(const TokenHostVec& tokens, const DatacenterMap& datacenters, + TokenReplicasVec& result) const; + +private: + Type type_; + ReplicationFactorMap replication_factors_; +}; + +template +void ReplicationStrategy::init(IdGenerator& dc_ids, + const VersionNumber& cassandra_version, + const Row* row) { + StringRef strategy_class; + + if (cassandra_version >= VersionNumber(3, 0, 0)) { + const Value* value = row->get_by_name("replication"); + if (value && value->is_map() && + is_string_type(value->primary_value_type()) && + is_string_type(value->secondary_value_type())) { + MapIterator iterator(value); + while (iterator.next()) { + std::string key(iterator.key()->to_string()); + if (key == "class") { + strategy_class = iterator.value()->to_string_ref(); + } else { + std::string value(iterator.value()->to_string()); + size_t count = strtoul(value.c_str(), NULL, 10); + if (count > 0) { + ReplicationFactor replication_factor; + replication_factor.count = count; + replication_factor.name = key; + if (key == "replication_factor"){ + replication_factors_[1] = replication_factor; + } else { + replication_factors_[dc_ids.get(key)] = replication_factor; + } + } else { + LOG_WARN("Replication factor of 0 for option %s", key.c_str()); + } + } + } + } + } else { + const Value* value; + value = row->get_by_name("strategy_class"); + if (value && is_string_type(value->value_type())) { + strategy_class = value->to_string_ref(); + } + + value = row->get_by_name("strategy_options"); + + int32_t buffer_size = value->size(); + ScopedPtr buf(new char[buffer_size + 1]); + memcpy(buf.get(), value->data(), buffer_size); + buf[buffer_size] = '\0'; + + rapidjson::Document d; + d.ParseInsitu(buf.get()); + + if (!d.HasParseError() && d.IsObject()) { + for (rapidjson::Value::ConstMemberIterator i = d.MemberBegin(); i != d.MemberEnd(); ++i) { + std::string key(i->name.GetString(), i->name.GetStringLength()); + std::string value(i->value.GetString(), i->value.GetStringLength()); + size_t count = strtoul(value.c_str(), NULL, 10); + if (count > 0) { + ReplicationFactor replication_factor; + replication_factor.count = count; + replication_factor.name = key; + if (key == "replication_factor") { + replication_factors_[1] = replication_factor; + } else { + replication_factors_[dc_ids.get(key)] = replication_factor; + } + } else { + LOG_WARN("Replication factor of 0 for option %s", key.c_str()); + } + } + } + } + + if (ends_with(strategy_class, CASS_NETWORK_TOPOLOGY_STRATEGY)) { + type_ = NETWORK_TOPOLOGY_STRATEGY; + } else if (ends_with(strategy_class, CASS_SIMPLE_STRATEGY)) { + type_ = SIMPLE_STRATEGY; + } +} + +template +void ReplicationStrategy::build_replicas(const TokenHostVec& tokens, const DatacenterMap& datacenters, + TokenReplicasVec& result) const { + result.clear(); + result.reserve(tokens.size()); + + switch (type_) { + case NETWORK_TOPOLOGY_STRATEGY: + build_replicas_network_topology(tokens, datacenters, result); + break; + case SIMPLE_STRATEGY: + build_replicas_simple(tokens, datacenters, result); + break; + default: + build_replicas_non_replicated(tokens, datacenters, result); + break; + } +} + +template +void ReplicationStrategy::build_replicas_network_topology(const TokenHostVec& tokens, const DatacenterMap& datacenters, + TokenReplicasVec& result) const { + if (replication_factors_.empty()) { + return; + } + + DatacenterRackInfoMap dc_racks; + dc_racks.resize(datacenters.size()); + + size_t num_replicas = 0; + + // Populate the datacenter and rack information. Only considering valid + // datacenters that actually have hosts. If there's a replication factor + // for a datacenter that doesn't exist or has no node then it will not + // be counted. + for (ReplicationFactorMap::const_iterator i = replication_factors_.begin(), + end = replication_factors_.end(); i != end; ++i) { + DatacenterMap::const_iterator j = datacenters.find(i->first); + // Don't include datacenters that don't exist + if (j != datacenters.end()) { + // A replication factor cannot exceed the number of nodes in a datacenter + size_t replication_factor = std::min(i->second.count, j->second.num_nodes); + num_replicas += replication_factor; + DatacenterRackInfo dc_rack_info; + dc_rack_info.replication_factor = replication_factor; + dc_rack_info.rack_count = j->second.racks.size(); + dc_racks[j->first] = dc_rack_info; + } else { + LOG_WARN("No nodes in datacenter '%s'. Check your replication strategies.", i->second.name.c_str()); + } + } + + if (num_replicas == 0) { + return; + } + + for (typename TokenHostVec::const_iterator i = tokens.begin(), + end = tokens.end(); i != end; ++i) { + Token token = i->first; + typename TokenHostVec::const_iterator token_it = i; + + CopyOnWriteHostVec replicas(new HostVec()); + replicas->reserve(num_replicas); + + // Clear datacenter and rack information for the next token + for (typename DatacenterRackInfoMap::iterator j = dc_racks.begin(), + end = dc_racks.end(); j != end; ++j) { + j->second.replica_count = 0; + j->second.racks_observed.clear(); + j->second.skipped_endpoints.clear(); + } + + for (typename TokenHostVec::const_iterator j = tokens.begin(), + end = tokens.end(); j != end && replicas->size() < num_replicas; ++j) { + typename TokenHostVec::const_iterator curr_token_it = token_it; + Host* host = curr_token_it->second; + uint32_t dc = host->dc_id(); + uint32_t rack = host->rack_id(); + + ++token_it; + if (token_it == tokens.end()) { + token_it = tokens.begin(); + } + + typename DatacenterRackInfoMap::iterator dc_rack_it = dc_racks.find(dc); + if (dc_rack_it == dc_racks.end()) { + continue; + } + + DatacenterRackInfo& dc_rack_info = dc_rack_it->second; + + size_t& replica_count_this_dc = dc_rack_info.replica_count; + const size_t replication_factor = dc_rack_info.replication_factor; + + if (replica_count_this_dc >= replication_factor) { + continue; + } + + RackSet& racks_observed_this_dc = dc_rack_info.racks_observed; + const size_t rack_count_this_dc = dc_rack_info.rack_count; + + // First, attempt to distribute replicas over all possible racks in a + // datacenter only then consider hosts in the same rack + + if (rack == 0 || racks_observed_this_dc.size() == rack_count_this_dc) { + ++replica_count_this_dc; + replicas->push_back(Host::Ptr(host)); + } else { + TokenHostQueue& skipped_endpoints_this_dc = dc_rack_info.skipped_endpoints; + if (racks_observed_this_dc.count(rack) > 0) { + skipped_endpoints_this_dc.push_back(curr_token_it); + } else { + ++replica_count_this_dc; + replicas->push_back(Host::Ptr(host)); + racks_observed_this_dc.insert(rack); + + // Once we visited every rack in the current datacenter then starting considering + // hosts we've already skipped. + if (racks_observed_this_dc.size() == rack_count_this_dc) { + while (!skipped_endpoints_this_dc.empty() && replica_count_this_dc < replication_factor) { + ++replica_count_this_dc; + replicas->push_back(Host::Ptr(skipped_endpoints_this_dc.front()->second)); + skipped_endpoints_this_dc.pop_front(); + } + } + } + } + } + + result.push_back(TokenReplicas(token, replicas)); + } +} + +template +void ReplicationStrategy::build_replicas_simple(const TokenHostVec& tokens, const DatacenterMap& not_used, + TokenReplicasVec& result) const { + ReplicationFactorMap::const_iterator it = replication_factors_.find(1); + if (it == replication_factors_.end()) { + return; + } + size_t num_replicas = std::min(it->second.count, tokens.size()); + for (typename TokenHostVec::const_iterator i = tokens.begin(), + end = tokens.end(); i != end; ++i) { + CopyOnWriteHostVec replicas(new HostVec()); + typename TokenHostVec::const_iterator token_it = i; + do { + replicas->push_back(Host::Ptr(token_it->second)); + ++token_it; + if (token_it == tokens.end()) { + token_it = tokens.begin(); + } + } while (replicas->size() < num_replicas); + result.push_back(TokenReplicas(i->first, replicas)); + } +} + +template +void ReplicationStrategy::build_replicas_non_replicated(const TokenHostVec& tokens, const DatacenterMap& not_used, + TokenReplicasVec& result) const { + for (typename TokenHostVec::const_iterator i = tokens.begin(); i != tokens.end(); ++i) { + CopyOnWriteHostVec replicas(new HostVec(1, Host::Ptr(i->second))); + result.push_back(TokenReplicas(i->first, replicas)); + } +} + +template +class TokenMapImpl : public TokenMap { +public: + typedef typename Partitioner::Token Token; + + typedef std::pair TokenHost; + typedef std::vector TokenHostVec; + + struct TokenHostCompare { + bool operator()(const TokenHost& lhs, const TokenHost& rhs) const { + return lhs.first < rhs.first; + } + }; + + struct RemoveTokenHostIf { + RemoveTokenHostIf(const Host::Ptr& host) + : host(host) { } + + bool operator()(const TokenHost& token) const { + if (!token.second) { + return false; + } + return token.second->address() == host->address(); + } + + const Host::Ptr& host; + }; + + typedef std::pair TokenReplicas; + typedef std::vector TokenReplicasVec; + + struct TokenReplicasCompare { + bool operator()(const TokenReplicas& lhs, const TokenReplicas& rhs) const { + return lhs.first < rhs.first; + } + }; + + typedef sparsehash::dense_hash_map KeyspaceReplicaMap; + typedef sparsehash::dense_hash_map > KeyspaceStrategyMap; + + static const CopyOnWriteHostVec NO_REPLICAS; + + TokenMapImpl() { + replicas_.set_empty_key(std::string()); + replicas_.set_deleted_key(std::string(1, '\0')); + strategies_.set_empty_key(std::string()); + strategies_.set_deleted_key(std::string(1, '\0')); + } + + virtual void add_host(const Host::Ptr& host, const Value* tokens); + virtual void update_host_and_build(const Host::Ptr& host, const Value* tokens); + virtual void remove_host_and_build(const Host::Ptr& host); + virtual void clear_tokens_and_hosts(); + + virtual void add_keyspaces(const VersionNumber& cassandra_version, ResultResponse* result); + virtual void update_keyspaces_and_build(const VersionNumber& cassandra_version, ResultResponse* result); + virtual void drop_keyspace(const std::string& keyspace_name); + virtual void clear_replicas_and_strategies(); + + virtual void build(); + + virtual const CopyOnWriteHostVec& get_replicas(const std::string& keyspace_name, + const std::string& routing_key) const; + +private: + void update_keyspace(const VersionNumber& cassandra_version, + ResultResponse* result, + bool should_build_replicas); + void remote_host_tokens(const Host::Ptr& host); + void update_host_ids(const Host::Ptr& host); + void build_replicas(); + +private: + TokenHostVec tokens_; + HostSet hosts_; + DatacenterMap datacenters_; + KeyspaceReplicaMap replicas_; + KeyspaceStrategyMap strategies_; + IdGenerator rack_ids_; + IdGenerator dc_ids_; +}; + +template +const CopyOnWriteHostVec TokenMapImpl::NO_REPLICAS(NULL); + +template +void TokenMapImpl::add_host(const Host::Ptr& host, const Value* tokens) { + update_host_ids(host); + hosts_.insert(host); + + CollectionIterator iterator(tokens); + while (iterator.next()) { + Token token = Partitioner::from_string(iterator.value()->to_string_ref()); + tokens_.push_back(TokenHost(token, host.get())); + } +} + +template +void TokenMapImpl::update_host_and_build(const Host::Ptr& host, const Value* tokens) { + uint64_t start = uv_hrtime(); + remote_host_tokens(host); + + update_host_ids(host); + hosts_.insert(host); + + TokenHostVec new_tokens; + CollectionIterator iterator(tokens); + while (iterator.next()) { + Token token = Partitioner::from_string(iterator.value()->to_string_ref()); + new_tokens.push_back(TokenHost(token, host.get())); + } + + std::sort(new_tokens.begin(), new_tokens.end()); + + size_t previous_size = tokens_.size(); + tokens_.resize(tokens_.size() + new_tokens.size()); + std::merge(tokens_.begin(), tokens_.begin() + previous_size, + new_tokens.begin(), new_tokens.end(), + tokens_.begin(), TokenHostCompare()); + + build_replicas(); + LOG_DEBUG("Updated token map with host %s (%u tokens). Rebuilt token map with %u hosts and %u tokens in %f ms", + host->address_string().c_str(), + (unsigned int)new_tokens.size(), + (unsigned int)hosts_.size(), + (unsigned int)tokens_.size(), + (double)(uv_hrtime() - start) / (1000.0 * 1000.0)); +} + +template +void TokenMapImpl::remove_host_and_build(const Host::Ptr& host) { + uint64_t start = uv_hrtime(); + remote_host_tokens(host); + hosts_.erase(host); + build_replicas(); + LOG_DEBUG("Removed host %s from token map. Rebuilt token map with %u hosts and %u tokens in %f ms", + host->address_string().c_str(), + (unsigned int)hosts_.size(), + (unsigned int)tokens_.size(), + (double)(uv_hrtime() - start) / (1000.0 * 1000.0)); +} + +template +void TokenMapImpl::clear_tokens_and_hosts() { + tokens_.clear(); + hosts_.clear(); +} + +template +void TokenMapImpl::add_keyspaces(const VersionNumber& cassandra_version, + ResultResponse* result) { + update_keyspace(cassandra_version, result, false); +} + +template +void TokenMapImpl::update_keyspaces_and_build(const VersionNumber& cassandra_version, + ResultResponse* result) { + update_keyspace(cassandra_version, result, true); +} + +template +void TokenMapImpl::drop_keyspace(const std::string& keyspace_name) { + replicas_.erase(keyspace_name); + strategies_.erase(keyspace_name); +} + +template +void TokenMapImpl::clear_replicas_and_strategies() { + replicas_.clear(); + strategies_.clear(); +} + +template +void TokenMapImpl::build() { + uint64_t start = uv_hrtime(); + std::sort(tokens_.begin(), tokens_.end()); + build_replicas(); + LOG_DEBUG("Built token map with %u hosts and %u tokens in %f ms", + (unsigned int)hosts_.size(), + (unsigned int)tokens_.size(), + (double)(uv_hrtime() - start) / (1000.0 * 1000.0)); +} + +template +const CopyOnWriteHostVec& TokenMapImpl::get_replicas(const std::string& keyspace_name, + const std::string& routing_key) const { + typename KeyspaceReplicaMap::const_iterator ks_it = replicas_.find(keyspace_name); + + if (ks_it != replicas_.end()) { + Token token = Partitioner::hash(routing_key); + const TokenReplicasVec& replicas = ks_it->second; + typename TokenReplicasVec::const_iterator replicas_it = std::upper_bound(replicas.begin(), replicas.end(), + TokenReplicas(token, NO_REPLICAS), + TokenReplicasCompare()); + if (replicas_it != replicas.end()) { + return replicas_it->second; + } else if (!replicas.empty()) { + return replicas.front().second; + } + } + + return NO_REPLICAS; +} + +template +void TokenMapImpl::update_keyspace(const VersionNumber& cassandra_version, + ResultResponse* result, + bool should_build_replicas) { + ResultIterator rows(result); + + while (rows.next()) { + std::string keyspace_name; + const Row* row = rows.row(); + + if (!row->get_string_by_name("keyspace_name", &keyspace_name)) { + LOG_ERROR("Unable to get column value for 'keyspace_name'"); + continue; + } + + ReplicationStrategy strategy; + + strategy.init(dc_ids_, cassandra_version, row); + + typename KeyspaceStrategyMap::iterator i = strategies_.find(keyspace_name); + if (i == strategies_.end() || i->second != strategy) { + if (i == strategies_.end()) { + strategies_[keyspace_name] = strategy; + } else { + i->second = strategy; + } + if (should_build_replicas) { + uint64_t start = uv_hrtime(); + build_datacenters(hosts_, datacenters_); + strategy.build_replicas(tokens_, datacenters_, replicas_[keyspace_name]); + LOG_DEBUG("Updated token map with keyspace '%s'. Rebuilt token map with %u hosts and %u tokens in %f ms", + keyspace_name.c_str(), + (unsigned int)hosts_.size(), + (unsigned int)tokens_.size(), + (double)(uv_hrtime() - start) / (1000.0 * 1000.0)); + } + } + } +} + +template +void TokenMapImpl::remote_host_tokens(const Host::Ptr& host) { + typename TokenHostVec::iterator last = std::remove_copy_if(tokens_.begin(), tokens_.end(), + tokens_.begin(), + RemoveTokenHostIf(host)); + tokens_.resize(last - tokens_.begin()); +} + +template +void TokenMapImpl::update_host_ids(const Host::Ptr& host) { + host->set_rack_and_dc_ids(rack_ids_.get(host->rack()), dc_ids_.get(host->dc())); +} + +template +void TokenMapImpl::build_replicas() { + build_datacenters(hosts_, datacenters_); + for (typename KeyspaceStrategyMap::const_iterator i = strategies_.begin(), + end = strategies_.end(); + i != end; ++i) { + const std::string& keyspace_name = i->first; + const ReplicationStrategy& strategy = i->second; + strategy.build_replicas(tokens_, datacenters_, replicas_[keyspace_name]); + } +} + +} // namespace cass + +#endif diff --git a/test/ccm_bridge/src/bridge.cpp b/test/ccm_bridge/src/bridge.cpp index a2b05c3a7..b8252301f 100644 --- a/test/ccm_bridge/src/bridge.cpp +++ b/test/ccm_bridge/src/bridge.cpp @@ -517,12 +517,13 @@ ClusterStatus CCM::Bridge::cluster_status() { return status; } -bool CCM::Bridge::create_cluster(unsigned short data_center_one_nodes /*= 1*/, - unsigned short data_center_two_node /*= 0*/, - bool is_ssl /* = false */, bool is_client_authentication /* = false */) { +bool CCM::Bridge::create_cluster(std::vector data_center_nodes, + bool with_vnodes /*= false*/, bool is_ssl /*= false*/, + bool is_client_authentication /*= false*/) { // Generate the cluster name and determine if it needs to be created std::string active_cluster_name = get_active_cluster(); - std::string cluster_name = generate_cluster_name(cassandra_version_, data_center_one_nodes, data_center_two_node, is_ssl, is_client_authentication); + std::string cluster_name = generate_cluster_name(cassandra_version_, data_center_nodes, + with_vnodes, is_ssl, is_client_authentication); if (use_dse_ && dse_workload_ != DSE_WORKLOAD_CASSANDRA) { cluster_name.append("-").append(dse_workloads_[dse_workload_]); } @@ -582,7 +583,7 @@ bool CCM::Bridge::create_cluster(unsigned short data_center_one_nodes /*= 1*/, execute_ccm_command(generate_create_updateconf_command(cassandra_version_)); // Create the cluster populate command and execute - std::string cluster_nodes = generate_cluster_nodes(data_center_one_nodes, data_center_two_node); + std::string cluster_nodes = generate_cluster_nodes(data_center_nodes); std::string cluster_ip_prefix = get_ip_prefix(); std::vector populate_command; populate_command.push_back("populate"); @@ -590,8 +591,17 @@ bool CCM::Bridge::create_cluster(unsigned short data_center_one_nodes /*= 1*/, populate_command.push_back(cluster_nodes); populate_command.push_back("-i"); populate_command.push_back(cluster_ip_prefix); + if (with_vnodes) { + populate_command.push_back("--vnodes"); + } execute_ccm_command(populate_command); + // Update the cluster configuration (set num_tokens) + if (with_vnodes) { + // Maximum number of tokens is 1536 + update_cluster_configuration("num_tokens", "1536"); + } + // Set the DSE workload (if applicable) if (use_dse_ && dse_workload_ != DSE_WORKLOAD_CASSANDRA) { set_dse_workload(dse_workload_); @@ -602,6 +612,18 @@ bool CCM::Bridge::create_cluster(unsigned short data_center_one_nodes /*= 1*/, return !(active_cluster_name.compare(cluster_name) == 0); } +bool CCM::Bridge::create_cluster(unsigned short data_center_one_nodes /*= 1*/, + unsigned short data_center_two_nodes /*= 0*/, bool with_vnodes /*= false*/, + bool is_ssl /*= false*/, bool is_client_authentication /*= false*/) { + // Create the data center nodes from the two data centers + std::vector data_center_nodes; + data_center_nodes.push_back(data_center_one_nodes); + data_center_nodes.push_back(data_center_two_nodes); + + return create_cluster(data_center_nodes, with_vnodes, is_ssl, + is_client_authentication); +} + bool CCM::Bridge::is_cluster_down() { // Iterate over each node and ensure a connection cannot be made ClusterStatus status = cluster_status(); @@ -752,6 +774,26 @@ void CCM::Bridge::update_cluster_configuration(const std::string& key, const std execute_ccm_command(updateconf_command); } +void CCM::Bridge::update_node_configuration(unsigned int node, std::vector key_value_pairs) { + // Create the update configuration command + key_value_pairs.insert(key_value_pairs.begin(), generate_node_name(node)); + key_value_pairs.insert(key_value_pairs.begin(), "updateconf"); + execute_ccm_command(key_value_pairs); +} + +void CCM::Bridge::update_node_configuration(unsigned int node, const std::string& key, const std::string& value) { + // Create the configuration to be updated + std::stringstream configuration; + configuration << key << ":" << value; + + // Create the update configuration command + std::vector updateconf_command; + updateconf_command.push_back(generate_node_name(node)); + updateconf_command.push_back("updateconf"); + updateconf_command.push_back(configuration.str()); + execute_ccm_command(updateconf_command); +} + unsigned int CCM::Bridge::add_node(const std::string& data_center /*= ""*/) { // Generate the arguments for the add node command unsigned int node = get_next_available_node(); @@ -1563,13 +1605,15 @@ std::vector CCM::Bridge::get_available_clusters(std::string& active } std::string CCM::Bridge::generate_cluster_name(CassVersion cassandra_version, - unsigned short data_center_one_nodes, - unsigned short data_center_two_nodes, - bool is_ssl, bool is_client_authentication) { + std::vector data_center_nodes, + bool with_vnodes, bool is_ssl, bool is_client_authentication) { std::stringstream cluster_name; cluster_name << cluster_prefix_ << "_" << (use_dse_ ? dse_version_.to_string(false) : cassandra_version.to_string(false)) - << "_" << data_center_one_nodes << "-" << data_center_two_nodes; + << "_" << generate_cluster_nodes(data_center_nodes, '-'); + if (with_vnodes) { + cluster_name << "-vnodes"; + } if (is_ssl) { cluster_name << "-ssl"; if (is_client_authentication) { @@ -1579,9 +1623,16 @@ std::string CCM::Bridge::generate_cluster_name(CassVersion cassandra_version, return cluster_name.str(); } -std::string CCM::Bridge::generate_cluster_nodes(unsigned short data_center_one_nodes, unsigned short data_center_two_nodes) { +std::string CCM::Bridge::generate_cluster_nodes(std::vector data_center_nodes, + char separator /* = ':'*/) { std::stringstream cluster_nodes; - cluster_nodes << data_center_one_nodes << ":" << data_center_two_nodes; + for (std::vector::iterator iterator = data_center_nodes.begin(); + iterator != data_center_nodes.end(); ++iterator) { + cluster_nodes << *iterator; + if ((iterator + 1) != data_center_nodes.end()) { + cluster_nodes << separator; + } + } return cluster_nodes.str(); } diff --git a/test/ccm_bridge/src/bridge.hpp b/test/ccm_bridge/src/bridge.hpp index 391bd08b0..dbf0e1bae 100644 --- a/test/ccm_bridge/src/bridge.hpp +++ b/test/ccm_bridge/src/bridge.hpp @@ -240,20 +240,41 @@ namespace CCM { */ ClusterStatus cluster_status(); + /** + * Create a Cassandra cluster with nodes in multiple data centers + * + * @param data_center_nodes Vector of data center nodes + * @param with_vnodes True if vnodes tokens should be used; false otherwise + * (default: false) + * @param is_ssl True if SSL should be enabled; false otherwise + * (default: false) + * @param is_client_authentication True if client authentication should be + * enabled; false otherwise (default: false) + * @return True if cluster was created or switched; false otherwise + * @throws BridgeException + */ + bool create_cluster(std::vector data_center_nodes, + bool with_vnodes = false, bool is_ssl = false, + bool is_client_authentication = false); + /** * Create a Cassandra cluster * * @param data_center_one_nodes Number of nodes for DC1 (default: 1) * @param data_center_two_nodes Number of nodes for DC2 (default: 0) + * @param with_vnodes True if vnodes tokens should be used; false otherwise + * (default: false) * @param is_ssl True if SSL should be enabled; false otherwise * (default: false) * @param is_client_authentication True if client authentication should be * enabled; false otherwise (default: false) * @return True if cluster was created or switched; false otherwise + * @deprecated More than two data centers are needed; will be removed after + * refactor of test harness */ - bool create_cluster(unsigned short data_center_one_nodes = 1, - unsigned short data_center_two_node = 0, - bool is_ssl = false, bool is_client_authentication = false); + bool CCM_BRIDGE_DEPRECATED(create_cluster(unsigned short data_center_one_nodes = 1, + unsigned short data_center_two_nodes = 0, bool with_vnodes = false, + bool is_ssl = false, bool is_client_authentication = false)); /** * Check to see if the active cluster is no longer accepting connections @@ -355,6 +376,23 @@ namespace CCM { */ void update_cluster_configuration(const std::string& key, const std::string& value, bool is_dse = false); + /** + * Update the node configuration + * + * @param node Node to update configuration on + * @param key_value_pairs Key:Value to update + */ + void update_node_configuration(unsigned int node, std::vector key_value_pairs); + + /** + * Update the node configuration + * + * @param node Node to update configuration on + * @param key Key to update + * @param value Value to apply to key configuration + */ + void update_node_configuration(unsigned int node, const std::string& key, const std::string& value); + /** * Add a node on the active Cassandra cluster * @@ -784,25 +822,27 @@ namespace CCM { * in each data center * * @param cassandra_version Cassandra version being used - * @param data_center_one_nodes Number of nodes for DC1 - * @param data_center_two_nodes Number of nodes for DC2 + * @param data_center_nodes Vector of nodes for each data center + * @param with_vnodes True if vnodes are enabled; false otherwise * @param is_ssl True if SSL is enabled; false otherwise - * @param is_client_authentiction True if client authentication is enabled; + * @param is_client_authentication True if client authentication is enabled; * false otherwise + * @return Cluster name */ std::string generate_cluster_name(CassVersion cassandra_version, - unsigned short data_center_one_nodes, - unsigned short data_center_two_nodes, - bool is_ssl, bool is_client_authentication); + std::vector data_center_nodes, + bool with_vnodes, bool is_ssl, bool is_client_authentication); /** * Generate the nodes parameter for theCassandra cluster based on the number * of nodes in each data center * - * @param data_center_one_nodes Number of nodes for DC1 - * #param data_center_two_nodes Number of nodes for DC2 + * @param data_center_nodes Vector of nodes for each data center + * @param separator Separator to use between cluster nodes + * @return String of nodes separated by separator */ - std::string generate_cluster_nodes(unsigned short data_center_one_nodes, unsigned short data_center_two_nodes); + std::string generate_cluster_nodes(std::vector data_center_nodes, + char separator = ':'); /** * Generate the CCM update configuration command based on the Cassandra diff --git a/test/integration_tests/src/test_authentication.cpp b/test/integration_tests/src/test_authentication.cpp index 93f166bef..ce8b6de8e 100644 --- a/test/integration_tests/src/test_authentication.cpp +++ b/test/integration_tests/src/test_authentication.cpp @@ -36,7 +36,7 @@ struct AthenticationTests { ccm->kill_cluster(); ccm->update_cluster_configuration("authenticator", "PasswordAuthenticator"); ccm->start_cluster("-Dcassandra.superuser_setup_delay_ms=0"); - test_utils::initialize_contact_points(cluster.get(), CCM::Bridge::get_ip_prefix("config.txt"), 1, 0); + test_utils::initialize_contact_points(cluster.get(), CCM::Bridge::get_ip_prefix("config.txt"), 1); } ~AthenticationTests() { diff --git a/test/integration_tests/src/test_consistency.cpp b/test/integration_tests/src/test_consistency.cpp index 1307e22c3..a0598d3c1 100644 --- a/test/integration_tests/src/test_consistency.cpp +++ b/test/integration_tests/src/test_consistency.cpp @@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(simple_two_nodes) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 2, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 2); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -99,7 +99,7 @@ BOOST_AUTO_TEST_CASE(one_node_down) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 3, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 3); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -153,7 +153,7 @@ BOOST_AUTO_TEST_CASE(two_nodes_down) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 3, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 3); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -205,7 +205,7 @@ BOOST_AUTO_TEST_CASE(retry_policy_downgrading) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 3, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 3); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); diff --git a/test/integration_tests/src/test_control_connection.cpp b/test/integration_tests/src/test_control_connection.cpp index cd1696691..8d2d83b8a 100644 --- a/test/integration_tests/src/test_control_connection.cpp +++ b/test/integration_tests/src/test_control_connection.cpp @@ -104,7 +104,7 @@ BOOST_AUTO_TEST_CASE(connect_invalid_port) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); cass_cluster_set_port(cluster.get(), 9999); // Invalid port @@ -127,7 +127,7 @@ BOOST_AUTO_TEST_CASE(reconnection) // Ensure RR policy cass_cluster_set_load_balance_round_robin(cluster.get()); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(topology_change) // Ensure RR policy cass_cluster_set_load_balance_round_robin(cluster.get()); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(status_change) // Ensure RR policy cass_cluster_set_load_balance_round_robin(cluster.get());; - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -223,7 +223,7 @@ BOOST_AUTO_TEST_CASE(node_discovery) cass_cluster_set_load_balance_round_robin(cluster.get());; // Only add a single IP - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -251,7 +251,7 @@ BOOST_AUTO_TEST_CASE(node_discovery_invalid_ips) cass_cluster_set_contact_points(cluster.get(), "192.0.2.0,192.0.2.1,192.0.2.3"); // Only add a single valid IP - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); // Make sure the timeout is very high for the initial invalid IPs test_utils::CassSessionPtr session(test_utils::create_session(cluster.get(), NULL, 60 * test_utils::ONE_SECOND_IN_MICROS)); @@ -277,7 +277,7 @@ BOOST_AUTO_TEST_CASE(node_discovery_no_local_rows) cass_cluster_set_load_balance_round_robin(cluster.get());; // Only add a single valid IP - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); { test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -307,7 +307,7 @@ BOOST_AUTO_TEST_CASE(node_discovery_no_rpc_addresss) cass_cluster_set_load_balance_round_robin(cluster.get());; // Only add a single valid IP - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); // Make the 'rpc_address' null on all applicable hosts (1 and 2) { @@ -339,7 +339,7 @@ BOOST_AUTO_TEST_CASE(full_outage) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); test_utils::execute_query(session.get(), query); @@ -372,7 +372,7 @@ BOOST_AUTO_TEST_CASE(node_decommission) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 2, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 2); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); // Wait for all hosts to be added to the pool; timeout after 10 seconds diff --git a/test/integration_tests/src/test_custom_payload.cpp b/test/integration_tests/src/test_custom_payload.cpp index 9b7b263e0..4bc9782b8 100644 --- a/test/integration_tests/src/test_custom_payload.cpp +++ b/test/integration_tests/src/test_custom_payload.cpp @@ -38,7 +38,7 @@ BOOST_AUTO_TEST_CASE(simple) ccm->start_cluster("-Dcassandra.custom_query_handler_class=org.apache.cassandra.cql3.CustomPayloadMirroringQueryHandler"); test_utils::CassClusterPtr cluster(cass_cluster_new()); - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); diff --git a/test/integration_tests/src/test_latency_aware_policy.cpp b/test/integration_tests/src/test_latency_aware_policy.cpp index d5888b151..0e85b6a9f 100644 --- a/test/integration_tests/src/test_latency_aware_policy.cpp +++ b/test/integration_tests/src/test_latency_aware_policy.cpp @@ -47,7 +47,7 @@ struct LatencyAwarePolicyTest { cass_cluster_set_reconnect_wait_time(cluster_.get(), 1); cass_cluster_set_connect_timeout(cluster_.get(), 240 * test_utils::ONE_SECOND_IN_MICROS); cass_cluster_set_request_timeout(cluster_.get(), 240 * test_utils::ONE_SECOND_IN_MICROS); - test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 3, 0); + test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 3); cass_cluster_set_latency_aware_routing(cluster_.get(), cass_true); cass_cluster_set_latency_aware_routing_settings(cluster_.get(), 1e6, 1, 1, 1, 1); // Handle deprecated and removed protocol versions [CASSANDRA-10146] diff --git a/test/integration_tests/src/test_load_balancing.cpp b/test/integration_tests/src/test_load_balancing.cpp index 7b86ebe31..357f1a840 100644 --- a/test/integration_tests/src/test_load_balancing.cpp +++ b/test/integration_tests/src/test_load_balancing.cpp @@ -65,7 +65,7 @@ BOOST_AUTO_TEST_CASE(round_robin) cass_cluster_set_load_balance_round_robin(cluster.get()); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); wait_for_total_connections(session, 3); @@ -106,7 +106,7 @@ BOOST_AUTO_TEST_CASE(dc_aware) cass_cluster_set_load_balance_dc_aware(cluster.get(), "dc1", 1, cass_false); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); wait_for_total_connections(session, 3); @@ -152,7 +152,7 @@ BOOST_AUTO_TEST_CASE(blacklist) { cass_cluster_set_blacklist_filtering(cluster.get(), host2.c_str()); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); wait_for_total_connections(session, 1); @@ -205,7 +205,7 @@ BOOST_AUTO_TEST_CASE(blacklist_dc) { cass_cluster_set_blacklist_dc_filtering(cluster.get(), "dc2"); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); wait_for_total_connections(session, 2); @@ -228,7 +228,7 @@ BOOST_AUTO_TEST_CASE(blacklist_dc) { cass_cluster_set_blacklist_dc_filtering(cluster.get(), ""); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); wait_for_total_connections(session, 4); diff --git a/test/integration_tests/src/test_logging.cpp b/test/integration_tests/src/test_logging.cpp index c41f4bd4b..cdf345c74 100644 --- a/test/integration_tests/src/test_logging.cpp +++ b/test/integration_tests/src/test_logging.cpp @@ -47,7 +47,7 @@ BOOST_AUTO_TEST_CASE(logging_callback) { test_utils::CassClusterPtr cluster(cass_cluster_new()); - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); } @@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(logging_connection_error_reduced) test_utils::CassLog::set_expected_log_level(CASS_LOG_ERROR); test_utils::CassClusterPtr cluster(cass_cluster_new()); - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(cass_session_new()); test_utils::CassFuturePtr connect_future(cass_session_connect(session.get(), cluster.get())); cass_future_error_code(connect_future.get()); @@ -127,7 +127,7 @@ BOOST_AUTO_TEST_CASE(logging_pool_error_reduced) cass_cluster_set_connection_heartbeat_interval(cluster.get(), 1); cass_cluster_set_connection_idle_timeout(cluster.get(), 1); cass_cluster_set_request_timeout(cluster.get(), 1000); - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(cass_session_new()); ccm->start_cluster(); diff --git a/test/integration_tests/src/test_metrics.cpp b/test/integration_tests/src/test_metrics.cpp index 2f494cb1f..b7f992894 100644 --- a/test/integration_tests/src/test_metrics.cpp +++ b/test/integration_tests/src/test_metrics.cpp @@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(connections) { cass_cluster_set_num_threads_io(cluster_.get(), 1); cass_cluster_set_core_connections_per_host(cluster_.get(), 1); cass_cluster_set_reconnect_wait_time(cluster_.get(), 10); // Low re-connect for node restart - test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 3, 0); + test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 3); if (ccm_->create_cluster(3)) { ccm_->start_cluster(); } @@ -159,7 +159,7 @@ BOOST_AUTO_TEST_CASE(connections) { BOOST_AUTO_TEST_CASE(timeouts) { CassMetrics metrics; cass_cluster_set_core_connections_per_host(cluster_.get(), 2); - test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 2, 0); + test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 2); /* * Check for connection timeouts @@ -237,7 +237,7 @@ BOOST_AUTO_TEST_CASE(request_statistics) { //Create one connections per host cass_cluster_set_num_threads_io(cluster_.get(), 1); cass_cluster_set_core_connections_per_host(cluster_.get(), 1); - test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster_.get(), ccm_->get_ip_prefix(), 1); if (ccm_->create_cluster()) { ccm_->start_cluster(); } diff --git a/test/integration_tests/src/test_pool.cpp b/test/integration_tests/src/test_pool.cpp index f56ada8eb..09cd8f1c1 100644 --- a/test/integration_tests/src/test_pool.cpp +++ b/test/integration_tests/src/test_pool.cpp @@ -202,7 +202,7 @@ BOOST_AUTO_TEST_CASE(dont_recycle_pool_on_timeout) { ConnectionInterruptionData ci_data = { tester.ccm.get(), 2, 0, 0 }; std::string ip_prefix = tester.ccm->get_ip_prefix(); - test_utils::initialize_contact_points(tester.cluster, ip_prefix, 2, 0); + test_utils::initialize_contact_points(tester.cluster, ip_prefix, 2); cass_cluster_set_connect_timeout(tester.cluster, 1000); cass_cluster_set_num_threads_io(tester.cluster, 32); cass_cluster_set_core_connections_per_host(tester.cluster, 4); diff --git a/test/integration_tests/src/test_sessions.cpp b/test/integration_tests/src/test_sessions.cpp index 268c3dcc6..001397503 100644 --- a/test/integration_tests/src/test_sessions.cpp +++ b/test/integration_tests/src/test_sessions.cpp @@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(connect_invalid_keyspace) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(cass_session_new()); test_utils::CassFuturePtr connect_future(cass_session_connect_keyspace(session.get(), cluster.get(), "invalid")); @@ -102,7 +102,7 @@ BOOST_AUTO_TEST_CASE(close_timeout_error) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); // Create new connections after 1 pending request cass_cluster_set_max_concurrent_requests_threshold(cluster.get(), 1); @@ -135,7 +135,7 @@ BOOST_AUTO_TEST_CASE(connect_when_already_connected) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(cass_session_new()); test_utils::CassFuturePtr connect_future1(cass_session_connect(session.get(), cluster.get())); @@ -160,7 +160,7 @@ BOOST_AUTO_TEST_CASE(close_when_already_closed) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(cass_session_new()); test_utils::CassFuturePtr connect_future(cass_session_connect(session.get(), cluster.get())); @@ -208,7 +208,7 @@ BOOST_AUTO_TEST_CASE(add_nodes_connect) { if (ccm->create_cluster()) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 3, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 3); //Add two nodes ccm->bootstrap_node(); @@ -229,7 +229,7 @@ BOOST_AUTO_TEST_CASE(add_nodes_connect) { if (ccm->create_cluster()) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 2, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 2); //Add two nodes ccm->bootstrap_node(); @@ -250,7 +250,7 @@ BOOST_AUTO_TEST_CASE(add_nodes_connect) { if (ccm->create_cluster()) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); //Add two nodes ccm->bootstrap_node(); @@ -270,7 +270,7 @@ BOOST_AUTO_TEST_CASE(add_nodes_connect) { if (ccm->create_cluster(2)) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 3, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 3); //Add one nodes ccm->bootstrap_node(); @@ -289,7 +289,7 @@ BOOST_AUTO_TEST_CASE(add_nodes_connect) { if (ccm->create_cluster(2)) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 2, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 2); //Add one nodes ccm->bootstrap_node(); @@ -569,7 +569,7 @@ BOOST_AUTO_TEST_CASE(stress) if (ccm->create_cluster()) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); //Open and close sessions sequentially SessionContainer sessions(cluster.get()); @@ -621,7 +621,7 @@ BOOST_AUTO_TEST_CASE(stress) if (ccm->create_cluster(3)) { ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 3, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 3); //Create sessions test_utils::CassLog::reset(SESSION_STRESS_OPENED_LOG_MESSAGE); diff --git a/test/integration_tests/src/test_ssl.cpp b/test/integration_tests/src/test_ssl.cpp index 24c1a472a..dd8961e5d 100644 --- a/test/integration_tests/src/test_ssl.cpp +++ b/test/integration_tests/src/test_ssl.cpp @@ -135,11 +135,11 @@ struct TestSSL { */ void setup(bool is_ssl = true, bool is_client_authentication = false, bool is_failure = false, unsigned int nodes = 1, unsigned int protocol_version = 2) { //Create a n-node cluster - ccm_->create_cluster(nodes, 0, is_ssl, is_client_authentication); + ccm_->create_cluster(nodes, 0, false, is_ssl, is_client_authentication); //Initialize the cpp-driver cluster_ = cass_cluster_new(); - test_utils::initialize_contact_points(cluster_, ccm_->get_ip_prefix(), nodes, 0); + test_utils::initialize_contact_points(cluster_, ccm_->get_ip_prefix(), nodes); cass_cluster_set_connect_timeout(cluster_, 10000); cass_cluster_set_request_timeout(cluster_, 10000); cass_cluster_set_num_threads_io(cluster_, 1); diff --git a/test/integration_tests/src/test_token_aware_policy.cpp b/test/integration_tests/src/test_token_aware_policy.cpp index 761d7f25a..e1ed74b52 100644 --- a/test/integration_tests/src/test_token_aware_policy.cpp +++ b/test/integration_tests/src/test_token_aware_policy.cpp @@ -57,7 +57,7 @@ struct TestTokenMap { void build(const std::string& ip_prefix, int num_nodes) { test_utils::CassClusterPtr cluster(cass_cluster_new()); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, num_nodes, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, num_nodes); cass_cluster_set_load_balance_round_robin(cluster.get()); cass_cluster_set_token_aware_routing(cluster.get(), cass_false); @@ -145,7 +145,7 @@ BOOST_AUTO_TEST_CASE(simple) cass_cluster_set_token_aware_routing(cluster.get(), cass_true); std::string ip_prefix = ccm->get_ip_prefix(); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -201,7 +201,7 @@ BOOST_AUTO_TEST_CASE(network_topology) cass_cluster_set_token_aware_routing(cluster.get(), cass_true); std::string ip_prefix = ccm->get_ip_prefix(); - test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1, 0); + test_utils::initialize_contact_points(cluster.get(), ip_prefix, 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -266,7 +266,7 @@ BOOST_AUTO_TEST_CASE(single_entry_routing_key) cass_cluster_set_load_balance_dc_aware(cluster.get(), "dc1", rf, cass_false); cass_cluster_set_token_aware_routing(cluster.get(), cass_true); - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); @@ -301,4 +301,99 @@ BOOST_AUTO_TEST_CASE(single_entry_routing_key) % keyspace)); } +/** + * Ensure the control connection is decoupled from request timeout + * + * This test addresses an issue where the control connection would timeout due + * to the rebuilding of the token map, re-establish the connection, re-build + * the token map and then rinse and repeat causing high CPU load and an + * infinite loop. + * + * @since 2.4.3 + * @jira_ticket CPP-388 + * @test_category load_balancing:token_aware + * @test_category control_connection + */ +BOOST_AUTO_TEST_CASE(no_timeout_control_connection) +{ + int num_of_keyspaces = 50; + int num_of_tables = 10; + std::string keyspace_prefix = "tap_"; + std::string table_prefix = "table_"; + test_utils::CassLog::reset("Request timed out to host"); + + // Create four data centers with single nodes + std::vector data_center_nodes; + for (int i = 0; i < 4; ++i) { + data_center_nodes.push_back(1); + } + + boost::shared_ptr ccm(new CCM::Bridge("config.txt")); + if (ccm->create_cluster(data_center_nodes, true)) { + ccm->start_cluster(); + } + + // Create a session with a quick request timeout + test_utils::CassClusterPtr cluster(cass_cluster_new()); + cass_cluster_set_token_aware_routing(cluster.get(), cass_true); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 4); + cass_cluster_set_request_timeout(cluster.get(), 500); + test_utils::CassSessionPtr session(test_utils::create_session(cluster.get())); + + // Create keyspaces, tables, and perform selects) + for (int i = 1; i <= num_of_keyspaces; ++i) { + // Randomly create keyspaces with valid and invalid data centers) + bool is_valid_keyspace = true; + std::string nts_dcs = "'dc1': 1, 'dc2': 1, 'dc3': 1, 'dc4': 1"; + if ((rand() % 4) == 0) { + // Create the invalid data center network topology + int unknown_dcs = (rand() % 250) + 50; // random number [50 - 250] + for (int j = 5; j <= 4 + unknown_dcs; ++j) { + nts_dcs += ", 'dc" + boost::lexical_cast(j) + "': 1"; + } + is_valid_keyspace = false; + } + + // Create the keyspace (handling errors to avoid test failure)) + CassError error_code; + do { + error_code = test_utils::execute_query_with_error(session.get(), + str(boost::format("CREATE KEYSPACE " + keyspace_prefix + "%d " + "WITH replication = { 'class': 'NetworkTopologyStrategy', " + + nts_dcs + " }") % i)); + } while (error_code != CASS_OK && error_code != CASS_ERROR_SERVER_ALREADY_EXISTS); + + // Perform table creation and random selects (iff keyspace is valid)) + if (is_valid_keyspace) { + // Create the table (handling errors to avoid test failures) + for (int j = 0; j < num_of_tables; ++j) { + std::stringstream full_table_name; + full_table_name << keyspace_prefix << i << "." + << table_prefix << j; + CassError error_code = CASS_ERROR_SERVER_READ_TIMEOUT; + do { + error_code = test_utils::execute_query_with_error(session.get(), + str(boost::format(test_utils::CREATE_TABLE_SIMPLE) % + full_table_name.str())); + } while (error_code != CASS_OK && error_code != CASS_ERROR_SERVER_ALREADY_EXISTS); + + // Randomly perform select statements on the newly created table + if (rand() % 2 == 0) { + std::string query = "SELECT * FROM " + full_table_name.str(); + do { + error_code = test_utils::execute_query_with_error(session.get(), query.c_str()); + } while (error_code != CASS_OK); + } + } + } + } + + /* + * Ensure timeouts occurred + * + * NOTE: This also ensures (if reached) that infinite loop did not occur + */ + BOOST_REQUIRE_GT(test_utils::CassLog::message_count(), 0); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/test/integration_tests/src/test_utils.cpp b/test/integration_tests/src/test_utils.cpp index 091e5a6f5..4a1723ae3 100644 --- a/test/integration_tests/src/test_utils.cpp +++ b/test/integration_tests/src/test_utils.cpp @@ -204,16 +204,19 @@ const char ALPHA_NUMERIC[] = { "01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJK CCM::CassVersion MultipleNodesTest::version("0.0.0"); -MultipleNodesTest::MultipleNodesTest(unsigned int num_nodes_dc1, unsigned int num_nodes_dc2, unsigned int protocol_version, bool is_ssl /* = false */) +MultipleNodesTest::MultipleNodesTest(unsigned int num_nodes_dc1, + unsigned int num_nodes_dc2, unsigned int protocol_version, + bool with_vnodes /* = false */, bool is_ssl /* = false */) : ccm(new CCM::Bridge("config.txt")) { - if (ccm->create_cluster(num_nodes_dc1, num_nodes_dc2, is_ssl)) { // Only start the cluster if it wasn't the active cluster + // Only start the cluster if it wasn't the active cluster + if (ccm->create_cluster(num_nodes_dc1, num_nodes_dc2, with_vnodes, is_ssl)) { ccm->start_cluster(); } version = ccm->get_cassandra_version("config.txt"); uuid_gen = cass_uuid_gen_new(); cluster = cass_cluster_new(); - initialize_contact_points(cluster, ccm->get_ip_prefix(), num_nodes_dc1, num_nodes_dc2); + initialize_contact_points(cluster, ccm->get_ip_prefix(), num_nodes_dc1 + num_nodes_dc2); cass_cluster_set_connect_timeout(cluster, 10 * ONE_SECOND_IN_MICROS); cass_cluster_set_request_timeout(cluster, 30 * ONE_SECOND_IN_MICROS); @@ -229,8 +232,13 @@ MultipleNodesTest::~MultipleNodesTest() { cass_cluster_free(cluster); } -SingleSessionTest::SingleSessionTest(unsigned int num_nodes_dc1, unsigned int num_nodes_dc2, unsigned int protocol_version, bool is_ssl /* = false */) - : MultipleNodesTest(num_nodes_dc1, num_nodes_dc2, protocol_version, is_ssl), session(NULL), ssl(NULL) { +SingleSessionTest::SingleSessionTest(unsigned int num_nodes_dc1, + unsigned int num_nodes_dc2, unsigned int protocol_version, + bool with_vnodes /* = false */, bool is_ssl /* = false */) + : MultipleNodesTest(num_nodes_dc1, num_nodes_dc2, protocol_version, + with_vnodes, is_ssl) + , session(NULL) + , ssl(NULL) { //SSL verification flags must be set before establishing session if (!is_ssl) { create_session(); @@ -260,8 +268,9 @@ SingleSessionTest::~SingleSessionTest() { } } -void initialize_contact_points(CassCluster* cluster, std::string prefix, unsigned int num_nodes_dc1, unsigned int num_nodes_dc2) { - for (unsigned int i = 0; i < num_nodes_dc1; ++i) { +void initialize_contact_points(CassCluster* cluster, std::string prefix, + unsigned int num_of_nodes) { + for (unsigned int i = 0; i < num_of_nodes; ++i) { std::string contact_point(prefix + boost::lexical_cast(i + 1)); cass_cluster_set_contact_points(cluster, contact_point.c_str()); } diff --git a/test/integration_tests/src/test_utils.hpp b/test/integration_tests/src/test_utils.hpp index e84c8f080..59d5a3fe4 100644 --- a/test/integration_tests/src/test_utils.hpp +++ b/test/integration_tests/src/test_utils.hpp @@ -1111,7 +1111,9 @@ struct Value { parametrized ctor. Derive from it to use it in your tests. */ struct MultipleNodesTest { - MultipleNodesTest(unsigned int num_nodes_dc1, unsigned int num_nodes_dc2, unsigned int protocol_version = 4, bool is_ssl = false); + MultipleNodesTest(unsigned int num_nodes_dc1, unsigned int num_nodes_dc2, + unsigned int protocol_version = 4, bool with_vnodes = false, + bool is_ssl = false); virtual ~MultipleNodesTest(); boost::shared_ptr ccm; @@ -1121,7 +1123,9 @@ struct MultipleNodesTest { }; struct SingleSessionTest : public MultipleNodesTest { - SingleSessionTest(unsigned int num_nodes_dc1, unsigned int num_nodes_dc2, unsigned int protocol_version = 4, bool is_ssl = false); + SingleSessionTest(unsigned int num_nodes_dc1, unsigned int num_nodes_dc2, + unsigned int protocol_version = 4, bool with_vnodes = false, + bool is_ssl = false); virtual ~SingleSessionTest(); void create_session(); void close_session(); @@ -1130,7 +1134,8 @@ struct SingleSessionTest : public MultipleNodesTest { CassSsl* ssl; }; -void initialize_contact_points(CassCluster* cluster, std::string prefix, unsigned int num_nodes_dc1, unsigned int num_nodes_dc2); +void initialize_contact_points(CassCluster* cluster, std::string prefix, + unsigned int num_of_nodes); const char* get_value_type(CassValueType type); diff --git a/test/integration_tests/src/test_version1_downgrade.cpp b/test/integration_tests/src/test_version1_downgrade.cpp index e803c1ecd..0d6292c48 100644 --- a/test/integration_tests/src/test_version1_downgrade.cpp +++ b/test/integration_tests/src/test_version1_downgrade.cpp @@ -47,7 +47,7 @@ BOOST_AUTO_TEST_CASE(query_after_downgrade) ccm->start_cluster(); } - test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1, 0); + test_utils::initialize_contact_points(cluster.get(), ccm->get_ip_prefix(), 1); cass_cluster_set_protocol_version(cluster.get(), 2); diff --git a/test/unit_tests/src/test_load_balancing.cpp b/test/unit_tests/src/test_load_balancing.cpp index 1ed776f68..247806b72 100644 --- a/test/unit_tests/src/test_load_balancing.cpp +++ b/test/unit_tests/src/test_load_balancing.cpp @@ -26,13 +26,13 @@ #include "murmur3.hpp" #include "query_request.hpp" #include "token_aware_policy.hpp" -#include "token_map.hpp" -#include "replication_strategy.hpp" #include "whitelist_policy.hpp" #include "blacklist_policy.hpp" #include "whitelist_dc_policy.hpp" #include "blacklist_dc_policy.hpp" +#include "test_token_map_utils.hpp" + #include #include #include @@ -160,20 +160,18 @@ BOOST_AUTO_TEST_CASE(simple) { cass::RoundRobinPolicy policy; policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - // start on first elem - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq1[] = {1, 2}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq1)); // rotate starting element - boost::scoped_ptr qp2(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp2(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq2[] = {2, 1}; verify_sequence(qp2.get(), VECTOR_FROM(size_t, seq2)); // back around - boost::scoped_ptr qp3(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp3(policy.new_query_plan("ks", NULL, NULL, NULL)); verify_sequence(qp3.get(), VECTOR_FROM(size_t, seq1)); } @@ -185,10 +183,8 @@ BOOST_AUTO_TEST_CASE(on_add) cass::RoundRobinPolicy policy; policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - // baseline - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq1[] = {1, 2}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq1)); @@ -197,7 +193,7 @@ BOOST_AUTO_TEST_CASE(on_add) cass::SharedRefPtr host = host_for_addr(addr_new); policy.on_add(host); - boost::scoped_ptr qp2(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp2(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq2[] = {2, seq_new, 1}; verify_sequence(qp2.get(), VECTOR_FROM(size_t, seq2)); } @@ -210,13 +206,11 @@ BOOST_AUTO_TEST_CASE(on_remove) cass::RoundRobinPolicy policy; policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); cass::SharedRefPtr host = hosts.begin()->second; policy.on_remove(host); - boost::scoped_ptr qp2(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp2(policy.new_query_plan("ks", NULL, NULL, NULL)); // first query plan has it // (note: not manipulating Host::state_ for dynamic removal) @@ -236,10 +230,8 @@ BOOST_AUTO_TEST_CASE(on_down_on_up) cass::RoundRobinPolicy policy; policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - - boost::scoped_ptr qp_before1(policy.new_query_plan("ks", NULL, tokenMap, NULL)); - boost::scoped_ptr qp_before2(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp_before1(policy.new_query_plan("ks", NULL, NULL, NULL)); + boost::scoped_ptr qp_before2(policy.new_query_plan("ks", NULL, NULL, NULL)); cass::SharedRefPtr host = hosts.begin()->second; policy.on_down(host); @@ -260,8 +252,8 @@ BOOST_AUTO_TEST_CASE(on_down_on_up) // host is added to the list, but not 'up' policy.on_up(host); - boost::scoped_ptr qp_after1(policy.new_query_plan("ks", NULL, tokenMap, NULL)); - boost::scoped_ptr qp_after2(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp_after1(policy.new_query_plan("ks", NULL, NULL, NULL)); + boost::scoped_ptr qp_after2(policy.new_query_plan("ks", NULL, NULL, NULL)); // 1 is dynamically excluded from plan { @@ -278,7 +270,7 @@ BOOST_AUTO_TEST_CASE(on_down_on_up) } } -BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() // round_robin_lb BOOST_AUTO_TEST_SUITE(dc_aware_lb) @@ -291,9 +283,8 @@ void test_dc_aware_policy(size_t local_count, size_t remote_count) { policy.init(cass::SharedRefPtr(), hosts); const size_t total_hosts = local_count + remote_count; - cass::TokenMap tokenMap; - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); std::vector seq(total_hosts); for (size_t i = 0; i < total_hosts; ++i) seq[i] = i + 1; verify_sequence(qp.get(), seq); @@ -317,8 +308,7 @@ BOOST_AUTO_TEST_CASE(some_dc_local_unspecified) cass::DCAwarePolicy policy(LOCAL_DC, 1, false); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq[] = {2, 3, 1}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); @@ -334,12 +324,10 @@ BOOST_AUTO_TEST_CASE(single_local_down) cass::DCAwarePolicy policy(LOCAL_DC, 1, false); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - - boost::scoped_ptr qp_before(policy.new_query_plan("ks", NULL, tokenMap, NULL));// has down host ptr in plan + boost::scoped_ptr qp_before(policy.new_query_plan("ks", NULL, NULL, NULL));// has down host ptr in plan target_host->set_down(); policy.on_down(target_host); - boost::scoped_ptr qp_after(policy.new_query_plan("ks", NULL, tokenMap, NULL));// should not have down host ptr in plan + boost::scoped_ptr qp_after(policy.new_query_plan("ks", NULL, NULL, NULL));// should not have down host ptr in plan { const size_t seq[] = {2, 3, 4}; @@ -362,12 +350,10 @@ BOOST_AUTO_TEST_CASE(all_local_removed_returned) cass::DCAwarePolicy policy(LOCAL_DC, 1, false); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - - boost::scoped_ptr qp_before(policy.new_query_plan("ks", NULL, tokenMap, NULL));// has down host ptr in plan + boost::scoped_ptr qp_before(policy.new_query_plan("ks", NULL, NULL, NULL));// has down host ptr in plan target_host->set_down(); policy.on_down(target_host); - boost::scoped_ptr qp_after(policy.new_query_plan("ks", NULL, tokenMap, NULL));// should not have down host ptr in plan + boost::scoped_ptr qp_after(policy.new_query_plan("ks", NULL, NULL, NULL));// should not have down host ptr in plan { const size_t seq[] = {2}; @@ -379,7 +365,7 @@ BOOST_AUTO_TEST_CASE(all_local_removed_returned) policy.on_up(target_host); // make sure we get the local node first after on_up - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); { const size_t seq[] = {1, 2}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); @@ -397,12 +383,10 @@ BOOST_AUTO_TEST_CASE(remote_removed_returned) cass::DCAwarePolicy policy(LOCAL_DC, 1, false); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - - boost::scoped_ptr qp_before(policy.new_query_plan("ks", NULL, tokenMap, NULL));// has down host ptr in plan + boost::scoped_ptr qp_before(policy.new_query_plan("ks", NULL, NULL, NULL));// has down host ptr in plan target_host->set_down(); policy.on_down(target_host); - boost::scoped_ptr qp_after(policy.new_query_plan("ks", NULL, tokenMap, NULL));// should not have down host ptr in plan + boost::scoped_ptr qp_after(policy.new_query_plan("ks", NULL, NULL, NULL));// should not have down host ptr in plan { const size_t seq[] = {1}; @@ -414,7 +398,7 @@ BOOST_AUTO_TEST_CASE(remote_removed_returned) policy.on_up(target_host); // make sure we get both nodes, correct order after - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); { const size_t seq[] = {1, 2}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); @@ -431,7 +415,7 @@ BOOST_AUTO_TEST_CASE(used_hosts_per_remote_dc) cass::DCAwarePolicy policy(LOCAL_DC, used_hosts, false); policy.init(cass::SharedRefPtr(), hosts); - cass::ScopedPtr qp(policy.new_query_plan("ks", NULL, cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); size_t total_hosts = 3 + used_hosts; std::vector seq(total_hosts); for (size_t i = 0; i < total_hosts; ++i) seq[i] = i + 1; @@ -456,7 +440,7 @@ BOOST_AUTO_TEST_CASE(allow_remote_dcs_for_local_cl) request->set_consistency(CASS_CONSISTENCY_LOCAL_ONE); // Check for only local hosts are used - cass::ScopedPtr qp(policy.new_query_plan("ks", request.get(), cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("ks", request.get(), NULL, NULL)); const size_t seq[] = {1, 2, 3}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -472,7 +456,7 @@ BOOST_AUTO_TEST_CASE(allow_remote_dcs_for_local_cl) request->set_consistency(CASS_CONSISTENCY_LOCAL_QUORUM); // Check for only local hosts are used - cass::ScopedPtr qp(policy.new_query_plan("ks", request.get(), cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("ks", request.get(), NULL, NULL)); const size_t seq[] = {1, 2, 3, 4, 5, 6}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -487,9 +471,9 @@ BOOST_AUTO_TEST_CASE(start_with_empty_local_dc) // Set local DC using connected host { cass::DCAwarePolicy policy("", 0, false); - policy.init(hosts[cass::Address("2.0.0.0", 4092)], hosts); + policy.init(hosts[cass::Address("2.0.0.0", 9042)], hosts); - cass::ScopedPtr qp(policy.new_query_plan("ks", NULL, cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq[] = {2, 3, 4}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -498,34 +482,23 @@ BOOST_AUTO_TEST_CASE(start_with_empty_local_dc) { cass::DCAwarePolicy policy("", 0, false); policy.init(cass::SharedRefPtr( - new cass::Host(cass::Address("0.0.0.0", 4092), false)), hosts); + new cass::Host(cass::Address("0.0.0.0", 9042), false)), hosts); - cass::ScopedPtr qp(policy.new_query_plan("ks", NULL, cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); const size_t seq[] = {1}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } } -BOOST_AUTO_TEST_SUITE_END() - +BOOST_AUTO_TEST_SUITE_END() // dc_aware_lb BOOST_AUTO_TEST_SUITE(token_aware_lb) -int64_t murmur3_hash(const std::string& s) { - return cass::MurmurHash3_x64_128(s.data(), s.size(), 0); -} - BOOST_AUTO_TEST_CASE(simple) { const int64_t num_hosts = 4; cass::HostMap hosts; populate_hosts(num_hosts, "rack1", LOCAL_DC, &hosts); - cass::TokenAwarePolicy policy(new cass::RoundRobinPolicy()); - cass::TokenMap token_map; - - token_map.set_partitioner(cass::Murmur3Partitioner::PARTITIONER_CLASS); - cass::SharedRefPtr strategy(new cass::SimpleStrategy("", 3)); - token_map.set_replication_strategy("test", strategy); // Tokens // 1.0.0.0 -4611686018427387905 @@ -533,17 +506,21 @@ BOOST_AUTO_TEST_CASE(simple) // 3.0.0.0 4611686018427387901 // 4.0.0.0 9223372036854775804 + cass::ScopedPtr token_map(cass::TokenMap::from_partitioner(cass::Murmur3Partitioner::name())); + uint64_t partition_size = CASS_UINT64_MAX / num_hosts; - int64_t t = CASS_INT64_MIN + partition_size; + int64_t token = CASS_INT64_MIN + partition_size; for (cass::HostMap::iterator i = hosts.begin(); i != hosts.end(); ++i) { - std::string ts = boost::lexical_cast(t); - cass::TokenStringList tokens; - tokens.push_back(cass::StringRef(ts)); - token_map.update_host(i->second, tokens); - t += partition_size; + TokenCollectionBuilder builder; + builder.append_token(token); + token_map->add_host(i->second, builder.finish()); + token += partition_size; } - token_map.build(); + add_keyspace_simple("test", 3, token_map.get()); + token_map->build(); + + cass::TokenAwarePolicy policy(new cass::RoundRobinPolicy()); policy.init(cass::SharedRefPtr(), hosts); cass::SharedRefPtr request(new cass::QueryRequest(1)); @@ -552,7 +529,7 @@ BOOST_AUTO_TEST_CASE(simple) request->add_key_index(0); { - cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map, NULL)); + cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map.get(), NULL)); const size_t seq[] = { 4, 1, 2, 3 }; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -562,7 +539,7 @@ BOOST_AUTO_TEST_CASE(simple) curr_host_it->second->set_down(); { - cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map, NULL)); + cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map.get(), NULL)); const size_t seq[] = { 2, 4, 3 }; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -575,7 +552,7 @@ BOOST_AUTO_TEST_CASE(simple) curr_host_it->second->set_down(); { - cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map, NULL)); + cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map.get(), NULL)); const size_t seq[] = { 2, 1, 3 }; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -595,16 +572,6 @@ BOOST_AUTO_TEST_CASE(network_topology) } } - cass::TokenAwarePolicy policy(new cass::DCAwarePolicy(LOCAL_DC, num_hosts / 2, false)); - cass::TokenMap token_map; - - token_map.set_partitioner(cass::Murmur3Partitioner::PARTITIONER_CLASS); - cass::NetworkTopologyStrategy::DCReplicaCountMap replication_factors; - replication_factors[LOCAL_DC] = 3; - replication_factors[REMOTE_DC] = 2; - cass::SharedRefPtr strategy(new cass::NetworkTopologyStrategy("", replication_factors)); - token_map.set_replication_strategy("test", strategy); - // Tokens // 1.0.0.0 local -6588122883467697006 // 2.0.0.0 remote -3952873730080618204 @@ -614,17 +581,24 @@ BOOST_AUTO_TEST_CASE(network_topology) // 6.0.0.0 remote 6588122883467697004 // 7.0.0.0 local 9223372036854775806 + cass::ScopedPtr token_map(cass::TokenMap::from_partitioner(cass::Murmur3Partitioner::name())); + uint64_t partition_size = CASS_UINT64_MAX / num_hosts; - int64_t t = CASS_INT64_MIN + partition_size; + int64_t token = CASS_INT64_MIN + partition_size; for (cass::HostMap::iterator i = hosts.begin(); i != hosts.end(); ++i) { - std::string ts = boost::lexical_cast(t); - cass::TokenStringList tokens; - tokens.push_back(cass::StringRef(ts)); - token_map.update_host(i->second, tokens); - t += partition_size; + TokenCollectionBuilder builder; + builder.append_token(token); + token_map->add_host(i->second, builder.finish()); + token += partition_size; } - token_map.build(); + ReplicationMap replication; + replication[LOCAL_DC] = "3"; + replication[REMOTE_DC] = "2"; + add_keyspace_network_topology("test", replication, token_map.get()); + token_map->build(); + + cass::TokenAwarePolicy policy(new cass::DCAwarePolicy(LOCAL_DC, num_hosts / 2, false)); policy.init(cass::SharedRefPtr(), hosts); cass::SharedRefPtr request(new cass::QueryRequest(1)); @@ -633,7 +607,7 @@ BOOST_AUTO_TEST_CASE(network_topology) request->add_key_index(0); { - cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map, NULL)); + cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map.get(), NULL)); const size_t seq[] = { 3, 5, 7, 1, 4, 6, 2 }; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -643,7 +617,7 @@ BOOST_AUTO_TEST_CASE(network_topology) curr_host_it->second->set_down(); { - cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map, NULL)); + cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map.get(), NULL)); const size_t seq[] = { 3, 5, 7, 6, 2, 4 }; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } @@ -655,13 +629,13 @@ BOOST_AUTO_TEST_CASE(network_topology) curr_host_it->second->set_down(); { - cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map, NULL)); + cass::ScopedPtr qp(policy.new_query_plan("test", request.get(), token_map.get(), NULL)); const size_t seq[] = { 5, 7, 1, 2, 4, 6 }; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq)); } } -BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() // token_aware_lb BOOST_AUTO_TEST_SUITE(latency_aware_lb) @@ -765,7 +739,7 @@ BOOST_AUTO_TEST_CASE(simple) // 1 and 4 are under the minimum, but 2 and 3 will be skipped { - cass::ScopedPtr qp(policy.new_query_plan("", NULL, cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("", NULL, NULL, NULL)); const size_t seq1[] = {1, 4, 2, 3}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq1)); } @@ -775,7 +749,7 @@ BOOST_AUTO_TEST_CASE(simple) // After waiting no hosts should be skipped (notice 2 and 3 tried first) { - cass::ScopedPtr qp(policy.new_query_plan("", NULL, cass::TokenMap(), NULL)); + cass::ScopedPtr qp(policy.new_query_plan("", NULL, NULL, NULL)); const size_t seq1[] = {2, 3, 4, 1}; verify_sequence(qp.get(), VECTOR_FROM(size_t, seq1)); } @@ -815,7 +789,7 @@ BOOST_AUTO_TEST_CASE(min_average_under_min_measured) BOOST_CHECK(policy.min_average() == -1); } -BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() // latency_aware_lb BOOST_AUTO_TEST_SUITE(whitelist_lb) @@ -830,8 +804,7 @@ BOOST_AUTO_TEST_CASE(simple) cass::WhitelistPolicy policy(new cass::RoundRobinPolicy(), whitelist_hosts); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); // Verify only hosts 37 and 83 are computed in the query plan const size_t seq1[] = { 37, 83 }; @@ -853,8 +826,7 @@ BOOST_AUTO_TEST_CASE(dc) cass::WhitelistDCPolicy policy(new cass::RoundRobinPolicy(), whitelist_dcs); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); // Verify only hosts LOCAL_DC and REMOTE_DC are computed in the query plan const size_t seq1[] = { 1, 2, 3, 7, 8, 9 }; @@ -864,7 +836,7 @@ BOOST_AUTO_TEST_CASE(dc) BOOST_REQUIRE(!qp.get()->compute_next(&next_address)); } -BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() // whitelist_lb BOOST_AUTO_TEST_SUITE(blacklist_lb) @@ -879,8 +851,7 @@ BOOST_AUTO_TEST_CASE(simple) cass::BlacklistPolicy policy(new cass::RoundRobinPolicy(), blacklist_hosts); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); // Verify only hosts 1, 4 and 5 are computed in the query plan const size_t seq1[] = { 1, 4, 5 }; @@ -902,8 +873,7 @@ BOOST_AUTO_TEST_CASE(dc) cass::BlacklistDCPolicy policy(new cass::RoundRobinPolicy(), blacklist_dcs); policy.init(cass::SharedRefPtr(), hosts); - cass::TokenMap tokenMap; - boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, tokenMap, NULL)); + boost::scoped_ptr qp(policy.new_query_plan("ks", NULL, NULL, NULL)); // Verify only hosts from BACKUP_DC are computed in the query plan const size_t seq1[] = { 4, 5, 6 }; @@ -913,4 +883,4 @@ BOOST_AUTO_TEST_CASE(dc) BOOST_REQUIRE(!qp.get()->compute_next(&next_address)); } -BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() // blacklist_lb diff --git a/test/unit_tests/src/test_replication_strategy.cpp b/test/unit_tests/src/test_replication_strategy.cpp index f6aa7ebb2..a832e2762 100644 --- a/test/unit_tests/src/test_replication_strategy.cpp +++ b/test/unit_tests/src/test_replication_strategy.cpp @@ -18,86 +18,177 @@ # define BOOST_TEST_MODULE cassandra #endif -#include "address.hpp" -#include "host.hpp" -#include "replication_strategy.hpp" +#include "test_token_map_utils.hpp" #include #include #include #include +#include -static cass::SharedRefPtr create_host(const std::string& ip, - const std::string& rack = "", - const std::string& dc = "") { - cass::SharedRefPtr host = - cass::SharedRefPtr(new cass::Host(cass::Address(ip, 4092), false)); - host->set_rack_and_dc(rack, dc); - return host; -} +namespace { + +static const cass::CopyOnWriteHostVec NO_REPLICAS(NULL); + +template +struct MockTokenMap { + typedef typename cass::ReplicationStrategy::Token Token; + typedef typename cass::ReplicationStrategy::TokenHost TokenHost; + typedef typename cass::ReplicationStrategy::TokenHostVec TokenHostVec; + typedef typename cass::ReplicationStrategy::TokenReplicas TokenReplicas; + typedef typename cass::ReplicationStrategy::TokenReplicasVec TokenReplicasVec; + + struct TokenReplicasCompare { + bool operator()(const TokenReplicas& lhs, const TokenReplicas& rhs) const { + return lhs.first < rhs.first; + } + }; + + cass::HostSet hosts; + cass::IdGenerator dc_ids; + cass::IdGenerator rack_ids; + + cass::ReplicationStrategy strategy; + TokenHostVec tokens; + TokenReplicasVec replicas; + cass::DatacenterMap datacenters; + + void init_simple_strategy(size_t replication_factor) { + cass::DataType::ConstPtr varchar_data_type(new cass::DataType(CASS_VALUE_TYPE_VARCHAR)); + + ColumnMetadataVec column_metadata; + column_metadata.push_back(ColumnMetadata("keyspace_name", varchar_data_type)); + column_metadata.push_back(ColumnMetadata("replication", cass::CollectionType::map(varchar_data_type, varchar_data_type, true))); + RowResultResponseBuilder builder(column_metadata); + + ReplicationMap replication; + replication["class"] = CASS_SIMPLE_STRATEGY; + + std::stringstream ss; + ss << replication_factor; + replication["replication_factor"] = ss.str(); + + builder.append_keyspace_row_v3("ks1", replication); + builder.finish(); + + cass::ResultIterator iterator(builder.finish()); + BOOST_CHECK(iterator.next()); + strategy.init(dc_ids, cass::VersionNumber(3, 0, 0), iterator.row()); + } + + void init_network_topology_strategy(ReplicationMap& replication) { + cass::DataType::ConstPtr varchar_data_type(new cass::DataType(CASS_VALUE_TYPE_VARCHAR)); + + ColumnMetadataVec column_metadata; + column_metadata.push_back(ColumnMetadata("keyspace_name", varchar_data_type)); + column_metadata.push_back(ColumnMetadata("replication", cass::CollectionType::map(varchar_data_type, varchar_data_type, true))); + RowResultResponseBuilder builder(column_metadata); + + replication["class"] = CASS_NETWORK_TOPOLOGY_STRATEGY; + builder.append_keyspace_row_v3("ks1", replication); + builder.finish(); + + cass::ResultIterator iterator(builder.finish()); + BOOST_CHECK(iterator.next()); + strategy.init(dc_ids, cass::VersionNumber(3, 0, 0), iterator.row()); + } + + void add_token(Token token, + const std::string& address, + const std::string& rack = "", + const std::string& dc = "") { + tokens.push_back(TokenHost(token, create_host(address, rack, dc))); + } + + void build_replicas() { + std::sort(tokens.begin(), tokens.end()); // We assume sorted tokens + cass::build_datacenters(hosts, datacenters); + strategy.build_replicas(tokens, datacenters, replicas); + } + + + const cass::CopyOnWriteHostVec& find_hosts(Token token) { + typename TokenReplicasVec::const_iterator i = std::lower_bound(replicas.begin(), replicas.end(), + TokenReplicas(token, NO_REPLICAS), + TokenReplicasCompare()); + if (i != replicas.end() && i->first == token) { + return i->second; + } + return NO_REPLICAS; + } + + cass::Host* create_host(const std::string& address, + const std::string& rack = "", + const std::string& dc = "") { + cass::Host::Ptr host(new cass::Host(cass::Address(address, 9042), false)); + host->set_rack_and_dc(rack, dc); + host->set_rack_and_dc_ids(rack_ids.get(rack), dc_ids.get(dc)); + cass::HostSet::iterator i = hosts.find(host); + if (i != hosts.end()) { + return i->get(); + } else { + hosts.insert(host); + return host.get(); + } + } +}; void check_host(const cass::SharedRefPtr& host, const std::string& ip, const std::string& rack = "", const std::string& dc = "") { - BOOST_CHECK(host->address() == cass::Address(ip, 4092)); + BOOST_CHECK(host->address() == cass::Address(ip, 9042)); BOOST_CHECK(host->rack() == rack); BOOST_CHECK(host->dc() == dc); } +} // namespace + BOOST_AUTO_TEST_SUITE(replication_strategy) BOOST_AUTO_TEST_CASE(simple) { - cass::SimpleStrategy strategy("SimpleStrategy", 3); + MockTokenMap token_map; - cass::TokenHostMap primary; + token_map.init_simple_strategy(3); - cass::Token t1(1, 'f'); - cass::Token t2(1, 'l'); - cass::Token t3(1, 'r'); - cass::Token t4(1, 'z'); + MockTokenMap::Token t1 = 0; + MockTokenMap::Token t2 = 100; + MockTokenMap::Token t3 = 200; + MockTokenMap::Token t4 = 300; - primary[t1] = create_host("1.0.0.1"); - primary[t2] = create_host("1.0.0.2"); - primary[t3] = create_host("1.0.0.3"); - primary[t4] = create_host("1.0.0.4"); + token_map.add_token(t1, "1.0.0.1"); + token_map.add_token(t2, "1.0.0.2"); + token_map.add_token(t3, "1.0.0.3"); + token_map.add_token(t4, "1.0.0.4"); - cass::TokenReplicaMap replicas; - strategy.tokens_to_replicas(primary, &replicas); + token_map.build_replicas(); { - BOOST_REQUIRE(replicas.count(t1) > 0); - const cass::CopyOnWriteHostVec& hosts = replicas.find(t1)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t1); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.1"); check_host((*hosts)[1], "1.0.0.2"); check_host((*hosts)[2], "1.0.0.3"); } { - BOOST_REQUIRE(replicas.count(t2) > 0); - const cass::CopyOnWriteHostVec& hosts = replicas.find(t2)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t2); check_host((*hosts)[0], "1.0.0.2"); check_host((*hosts)[1], "1.0.0.3"); check_host((*hosts)[2], "1.0.0.4"); } { - BOOST_REQUIRE(replicas.count(t3) > 0); - const cass::CopyOnWriteHostVec& hosts = replicas.find(t3)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t3); check_host((*hosts)[0], "1.0.0.3"); check_host((*hosts)[1], "1.0.0.4"); check_host((*hosts)[2], "1.0.0.1"); } { - BOOST_REQUIRE(replicas.count(t4) > 0); - const cass::CopyOnWriteHostVec& hosts = replicas.find(t4)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t4); check_host((*hosts)[0], "1.0.0.4"); check_host((*hosts)[1], "1.0.0.1"); check_host((*hosts)[2], "1.0.0.2"); @@ -106,40 +197,39 @@ BOOST_AUTO_TEST_CASE(simple) BOOST_AUTO_TEST_CASE(network_topology) { - cass::NetworkTopologyStrategy::DCReplicaCountMap dc_replicas; - dc_replicas["dc1"] = 2; - dc_replicas["dc2"] = 2; + MockTokenMap token_map; - cass::NetworkTopologyStrategy strategy("NetworkTopologyStrategy", dc_replicas); + ReplicationMap replication; + replication["dc1"] = "2"; + replication["dc2"] = "2"; - cass::TokenHostMap primary; + token_map.init_network_topology_strategy(replication); - cass::Token t1(1, 'c'); - cass::Token t2(1, 'f'); - cass::Token t3(1, 'i'); - cass::Token t4(1, 'l'); + MockTokenMap::Token t1 = 0; + MockTokenMap::Token t2 = 100; + MockTokenMap::Token t3 = 200; + MockTokenMap::Token t4 = 300; - primary[t1] = create_host("1.0.0.1", "rack1", "dc1"); - primary[t2] = create_host("1.0.0.2", "rack1", "dc1"); - primary[t3] = create_host("1.0.0.3", "rack2", "dc1"); - primary[t4] = create_host("1.0.0.4", "rack2", "dc1"); + token_map.add_token(t1, "1.0.0.1", "rack1", "dc1"); + token_map.add_token(t2, "1.0.0.2", "rack1", "dc1"); + token_map.add_token(t3, "1.0.0.3", "rack2", "dc1"); + token_map.add_token(t4, "1.0.0.4", "rack2", "dc1"); - cass::Token t5(1, 'o'); - cass::Token t6(1, 'r'); - cass::Token t7(1, 'u'); - cass::Token t8(1, 'z'); + MockTokenMap::Token t5 = 400; + MockTokenMap::Token t6 = 500; + MockTokenMap::Token t7 = 600; + MockTokenMap::Token t8 = 700; - primary[t5] = create_host("2.0.0.1", "rack1", "dc2"); - primary[t6] = create_host("2.0.0.2", "rack1", "dc2"); - primary[t7] = create_host("2.0.0.3", "rack2", "dc2"); - primary[t8] = create_host("2.0.0.4", "rack2", "dc2"); + token_map.add_token(t5, "2.0.0.1", "rack1", "dc2"); + token_map.add_token(t6, "2.0.0.2", "rack1", "dc2"); + token_map.add_token(t7, "2.0.0.3", "rack2", "dc2"); + token_map.add_token(t8, "2.0.0.4", "rack2", "dc2"); - cass::TokenReplicaMap replicas; - strategy.tokens_to_replicas(primary, &replicas); + token_map.build_replicas(); { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t1)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t1); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.3", "rack2", "dc1"); check_host((*hosts)[2], "2.0.0.1", "rack1", "dc2"); @@ -147,8 +237,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t2)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t2); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "1.0.0.2", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.3", "rack2", "dc1"); check_host((*hosts)[2], "2.0.0.1", "rack1", "dc2"); @@ -156,8 +246,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t3)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t3); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "1.0.0.3", "rack2", "dc1"); check_host((*hosts)[1], "2.0.0.1", "rack1", "dc2"); check_host((*hosts)[2], "2.0.0.3", "rack2", "dc2"); @@ -165,8 +255,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t4)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t4); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "1.0.0.4", "rack2", "dc1"); check_host((*hosts)[1], "2.0.0.1", "rack1", "dc2"); check_host((*hosts)[2], "2.0.0.3", "rack2", "dc2"); @@ -174,8 +264,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t5)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t5); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "2.0.0.1", "rack1", "dc2"); check_host((*hosts)[1], "2.0.0.3", "rack2", "dc2"); check_host((*hosts)[2], "1.0.0.1", "rack1", "dc1"); @@ -183,8 +273,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t6)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t6); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "2.0.0.2", "rack1", "dc2"); check_host((*hosts)[1], "2.0.0.3", "rack2", "dc2"); check_host((*hosts)[2], "1.0.0.1", "rack1", "dc1"); @@ -192,8 +282,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t7)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t7); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "2.0.0.3", "rack2", "dc2"); check_host((*hosts)[1], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[2], "1.0.0.3", "rack2", "dc1"); @@ -201,8 +291,8 @@ BOOST_AUTO_TEST_CASE(network_topology) } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t8)->second; - BOOST_REQUIRE(hosts->size() == 4); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t8); + BOOST_REQUIRE(hosts && hosts->size() == 4); check_host((*hosts)[0], "2.0.0.4", "rack2", "dc2"); check_host((*hosts)[1], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[2], "1.0.0.3", "rack2", "dc1"); @@ -212,76 +302,75 @@ BOOST_AUTO_TEST_CASE(network_topology) BOOST_AUTO_TEST_CASE(network_topology_same_rack) { - cass::NetworkTopologyStrategy::DCReplicaCountMap dc_replicas; - dc_replicas["dc1"] = 2; - dc_replicas["dc2"] = 1; + MockTokenMap token_map; - cass::NetworkTopologyStrategy strategy("NetworkTopologyStrategy", dc_replicas); + ReplicationMap replication; + replication["dc1"] = "2"; + replication["dc2"] = "1"; - cass::TokenHostMap primary; + token_map.init_network_topology_strategy(replication); - cass::Token t1(1, 'd'); - cass::Token t2(1, 'h'); - cass::Token t3(1, 'l'); + MockTokenMap::Token t1 = 100; + MockTokenMap::Token t2 = 200; + MockTokenMap::Token t3 = 300; - primary[t1] = create_host("1.0.0.1", "rack1", "dc1"); - primary[t2] = create_host("1.0.0.2", "rack1", "dc1"); - primary[t3] = create_host("1.0.0.3", "rack1", "dc1"); + token_map.add_token(t1, "1.0.0.1", "rack1", "dc1"); + token_map.add_token(t2, "1.0.0.2", "rack1", "dc1"); + token_map.add_token(t3, "1.0.0.3", "rack1", "dc1"); - cass::Token t4(1, 'p'); - cass::Token t5(1, 't'); - cass::Token t6(1, 'z'); + MockTokenMap::Token t4 = 400; + MockTokenMap::Token t5 = 500; + MockTokenMap::Token t6 = 600; - primary[t4] = create_host("2.0.0.1", "rack1", "dc2"); - primary[t5] = create_host("2.0.0.2", "rack1", "dc2"); - primary[t6] = create_host("2.0.0.3", "rack1", "dc2"); + token_map.add_token(t4, "2.0.0.1", "rack1", "dc2"); + token_map.add_token(t5, "2.0.0.2", "rack1", "dc2"); + token_map.add_token(t6, "2.0.0.3", "rack1", "dc2"); - cass::TokenReplicaMap replicas; - strategy.tokens_to_replicas(primary, &replicas); + token_map.build_replicas(); { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t1)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t1); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.2", "rack1", "dc1"); check_host((*hosts)[2], "2.0.0.1", "rack1", "dc2"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t2)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t2); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.2", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.3", "rack1", "dc1"); check_host((*hosts)[2], "2.0.0.1", "rack1", "dc2"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t3)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t3); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.3", "rack1", "dc1"); check_host((*hosts)[1], "2.0.0.1", "rack1", "dc2"); check_host((*hosts)[2], "1.0.0.1", "rack1", "dc1"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t4)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t4); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "2.0.0.1", "rack1", "dc2"); check_host((*hosts)[1], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[2], "1.0.0.2", "rack1", "dc1"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t5)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t5); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "2.0.0.2", "rack1", "dc2"); check_host((*hosts)[1], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[2], "1.0.0.2", "rack1", "dc1"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t6)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t6); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "2.0.0.3", "rack1", "dc2"); check_host((*hosts)[1], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[2], "1.0.0.2", "rack1", "dc1"); @@ -290,53 +379,52 @@ BOOST_AUTO_TEST_CASE(network_topology_same_rack) BOOST_AUTO_TEST_CASE(network_topology_not_enough_racks) { - cass::NetworkTopologyStrategy::DCReplicaCountMap dc_replicas; - dc_replicas["dc1"] = 3; + MockTokenMap token_map; - cass::NetworkTopologyStrategy strategy("NetworkTopologyStrategy", dc_replicas); + ReplicationMap replication; + replication["dc1"] = "3"; - cass::TokenHostMap primary; + token_map.init_network_topology_strategy(replication); - cass::Token t1(1, 'd'); - cass::Token t2(1, 'h'); - cass::Token t3(1, 'l'); - cass::Token t4(1, 'p'); + MockTokenMap::Token t1 = 100; + MockTokenMap::Token t2 = 200; + MockTokenMap::Token t3 = 300; + MockTokenMap::Token t4 = 400; - primary[t1] = create_host("1.0.0.1", "rack1", "dc1"); - primary[t2] = create_host("1.0.0.2", "rack1", "dc1"); - primary[t3] = create_host("1.0.0.3", "rack1", "dc1"); - primary[t4] = create_host("1.0.0.4", "rack2", "dc1"); + token_map.add_token(t1, "1.0.0.1", "rack1", "dc1"); + token_map.add_token(t2, "1.0.0.2", "rack1", "dc1"); + token_map.add_token(t3, "1.0.0.3", "rack1", "dc1"); + token_map.add_token(t4, "1.0.0.4", "rack2", "dc1"); - cass::TokenReplicaMap replicas; - strategy.tokens_to_replicas(primary, &replicas); + token_map.build_replicas(); { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t1)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t1); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.4", "rack2", "dc1"); check_host((*hosts)[2], "1.0.0.2", "rack1", "dc1"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t2)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t2); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.2", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.4", "rack2", "dc1"); check_host((*hosts)[2], "1.0.0.3", "rack1", "dc1"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t3)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t3); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.3", "rack1", "dc1"); check_host((*hosts)[1], "1.0.0.4", "rack2", "dc1"); check_host((*hosts)[2], "1.0.0.1", "rack1", "dc1"); } { - const cass::CopyOnWriteHostVec& hosts = replicas.find(t4)->second; - BOOST_REQUIRE(hosts->size() == 3); + const cass::CopyOnWriteHostVec& hosts = token_map.find_hosts(t4); + BOOST_REQUIRE(hosts && hosts->size() == 3); check_host((*hosts)[0], "1.0.0.4", "rack2", "dc1"); check_host((*hosts)[1], "1.0.0.1", "rack1", "dc1"); check_host((*hosts)[2], "1.0.0.2", "rack1", "dc1"); diff --git a/test/unit_tests/src/test_token_map.cpp b/test/unit_tests/src/test_token_map.cpp index cbe65644c..9a82c606f 100644 --- a/test/unit_tests/src/test_token_map.cpp +++ b/test/unit_tests/src/test_token_map.cpp @@ -18,92 +18,79 @@ # define BOOST_TEST_MODULE cassandra #endif -#include "address.hpp" -#include "constants.hpp" -#include "md5.hpp" -#include "murmur3.hpp" -#include "token_map.hpp" - -#include -#include -#include +#include "test_token_map_utils.hpp" + #include -#include -cass::SharedRefPtr create_host(const std::string& ip) { - return cass::SharedRefPtr(new cass::Host(cass::Address(ip, 4092), false)); -} +namespace { -template +template struct TestTokenMap { - typedef HashType(HashFunc)(const std::string& s); - typedef std::map > TokenHostMap; + typedef typename cass::ReplicationStrategy::Token Token; + typedef std::map TokenHostMap; TokenHostMap tokens; - cass::TokenMap token_map; - cass::SharedRefPtr strategy; - - void build(const std::string& partitioner, - const std::string& ks_name) { - token_map.set_partitioner(partitioner); - - if (!strategy) { - strategy = cass::SharedRefPtr( - new cass::NonReplicatedStrategy("")); + cass::ScopedPtr token_map; + + TestTokenMap() + : token_map(cass::TokenMap::from_partitioner(Partitioner::name())) { } + + void build(const std::string& keyspace_name = "ks", size_t replication_factor = 3) { + add_keyspace_simple(keyspace_name, replication_factor, token_map.get()); + for (typename TokenHostMap::const_iterator i = tokens.begin(), + end = tokens.end(); i != end; ++i) { + TokenCollectionBuilder builder; + builder.append_token(i->first); + token_map->add_host(i->second, builder.finish()); } + token_map->build(); + } - token_map.set_replication_strategy(ks_name, strategy); - - for (typename TokenHostMap::iterator i = tokens.begin(); i != tokens.end(); ++i) { - cass::TokenStringList tokens; - std::string token(boost::lexical_cast(i->first)); - tokens.push_back(token); - token_map.update_host(i->second, tokens); + const cass::Host::Ptr& get_replica(const std::string& key) { + typename TokenHostMap::const_iterator i = tokens.upper_bound(Partitioner::hash(key)); + if (i != tokens.end()) { + return i->second; + } else { + return tokens.begin()->second; } - - token_map.build(); } - void verify(HashFunc hash_func, const std::string& ks_name) { - for (int i = 0; i < 24; ++i) { - std::string value(1, 'a' + i); - const cass::CopyOnWriteHostVec& replicas - = token_map.get_replicas(ks_name, value); + void verify(const std::string& keyspace_name = "ks") { + const std::string keys[] = { "test", "abc", "def", "a", "b", "c", "d" }; - HashType hash = hash_func(value); - typename TokenHostMap::iterator token = tokens.upper_bound(hash); + for (size_t i = 0; i < sizeof(keys)/sizeof(keys[0]); ++i) { + const std::string& key = keys[i]; - if (token != tokens.end()) { - BOOST_CHECK(replicas->front() == token->second); - } else { - BOOST_CHECK(replicas->front() == tokens.begin()->second); - } + const cass::CopyOnWriteHostVec& hosts = token_map->get_replicas(keyspace_name, key); + BOOST_REQUIRE(hosts && hosts->size() > 0); + + const cass::Host::Ptr& host = get_replica(key); + BOOST_REQUIRE(host); + + BOOST_CHECK_EQUAL(hosts->front()->address(), host->address()); } } }; -BOOST_AUTO_TEST_SUITE(token_map) +} // namespace -int64_t murmur3_hash(const std::string& s) { - return cass::MurmurHash3_x64_128(s.data(), s.size(), 0); -} +BOOST_AUTO_TEST_SUITE(token_map) BOOST_AUTO_TEST_CASE(murmur3) { - TestTokenMap test_murmur3; + TestTokenMap test_murmur3; test_murmur3.tokens[CASS_INT64_MIN / 2] = create_host("1.0.0.1"); - test_murmur3.tokens[0] = create_host("1.0.0.2"); + test_murmur3.tokens[0] = create_host("1.0.0.2"); test_murmur3.tokens[CASS_INT64_MAX / 2] = create_host("1.0.0.3"); - // Anything greater than the last host should be wrapped around to host1 - test_murmur3.build(cass::Murmur3Partitioner::PARTITIONER_CLASS, "test"); - test_murmur3.verify(murmur3_hash, "test"); + test_murmur3.build(); + test_murmur3.verify(); } BOOST_AUTO_TEST_CASE(murmur3_multiple_tokens_per_host) { - TestTokenMap test_murmur3; + TestTokenMap test_murmur3; const size_t tokens_per_host = 256; @@ -113,155 +100,271 @@ BOOST_AUTO_TEST_CASE(murmur3_multiple_tokens_per_host) hosts.push_back(create_host("1.0.0.3")); hosts.push_back(create_host("1.0.0.4")); - boost::mt19937_64 ng; + MT19937_64 rng; for (cass::HostVec::iterator i = hosts.begin(); i != hosts.end(); ++i) { for (size_t j = 0; j < tokens_per_host; ++j) { - int64_t t = static_cast(ng()); - test_murmur3.tokens[t] = *i; + test_murmur3.tokens[rng()] = *i; } } - test_murmur3.build(cass::Murmur3Partitioner::PARTITIONER_CLASS, "test"); - test_murmur3.verify(murmur3_hash, "test"); + test_murmur3.build(); + test_murmur3.verify(); } -boost::multiprecision::int128_t random_hash(const std::string& s) { - cass::Md5 m; - m.update(reinterpret_cast(s.data()), s.size()); - uint8_t h[16]; - m.final(h); - std::string hex("0x"); - for (int i = 0; i < 16; ++i) { - char buf[4]; - sprintf(buf, "%02X", h[i]); - hex.append(buf); +BOOST_AUTO_TEST_CASE(murmur3_large_number_of_vnodes) +{ + TestTokenMap test_murmur3; + + size_t num_dcs = 3; + size_t num_racks = 3; + size_t num_hosts = 4; + size_t num_vnodes = 256; + size_t replication_factor = 3; + + ReplicationMap replication; + MT19937_64 rng; + cass::TokenMap* token_map = test_murmur3.token_map.get(); + TestTokenMap::TokenHostMap& tokens = test_murmur3.tokens; + + // Populate tokens + int host_count = 1; + for (size_t i = 1; i <= num_dcs; ++i) { + char dc[32]; + sprintf(dc, "dc%d", (int)i); + char rf[32]; + sprintf(rf, "%d", (int)replication_factor); + replication[dc] = rf; + + for (size_t j = 1; j <= num_racks; ++j) { + char rack[32]; + sprintf(rack, "rack%d", (int)j); + + for (size_t k = 1; k <= num_hosts; ++k) { + char ip[32]; + sprintf(ip, "127.0.%d.%d", host_count / 255, host_count % 255); + host_count++; + + cass::Host::Ptr host(create_host(ip, rack, dc)); + + TokenCollectionBuilder builder; + for (size_t i = 0; i < num_vnodes; ++i) { + cass::Murmur3Partitioner::Token token = rng(); + builder.append_token(token); + tokens[token] = host; + } + token_map->add_host(host, builder.finish()); + } + } + } + + // Build token map + add_keyspace_network_topology("ks1", replication, token_map); + token_map->build(); + + const std::string keys[] = { "test", "abc", "def", "a", "b", "c", "d" }; + + for (size_t i = 0; i < sizeof(keys)/sizeof(keys[0]); ++i) { + const std::string& key = keys[i]; + + const cass::CopyOnWriteHostVec& hosts = token_map->get_replicas("ks1", key); + BOOST_REQUIRE(hosts && hosts->size() == replication_factor * num_dcs); + + typedef std::map > DcRackMap; + + // Verify rack counts + DcRackMap dc_racks; + for (cass::HostVec::const_iterator i = hosts->begin(), + end = hosts->end(); i != end; ++i) { + const cass::Host::Ptr& host = (*i); + dc_racks[host->dc()].insert(host->rack()); + } + BOOST_CHECK(dc_racks.size() == num_dcs); + + for (DcRackMap::const_iterator i = dc_racks.begin(), + end = dc_racks.end(); i != end; ++i) { + BOOST_CHECK(i->second.size() >= std::min(num_racks, replication_factor)); + } + + // Verify replica + cass::Host::Ptr host = test_murmur3.get_replica(key); + BOOST_REQUIRE(host); + + BOOST_CHECK_EQUAL((*hosts)[0]->address(), host->address()); } - return boost::multiprecision::int128_t(hex); } BOOST_AUTO_TEST_CASE(random) { - TestTokenMap test_random; + cass::ScopedPtr token_map(cass::TokenMap::from_partitioner(cass::RandomPartitioner::name())); - test_random.tokens[boost::multiprecision::int128_t("42535295865117307932921825928971026432")] = create_host("1.0.0.1"); // 2^127 / 4 - test_random.tokens[boost::multiprecision::int128_t("85070591730234615865843651857942052864")] = create_host("1.0.0.2"); // 2^127 / 2 - test_random.tokens[boost::multiprecision::int128_t("1605887595351923798765477786913079296")] = create_host("1.0.0.3"); // 2^127 * 3 / 4 - // Anything greater than the last host should be wrapped around to host1 + TestTokenMap test_random; - test_random.build(cass::RandomPartitioner::PARTITIONER_CLASS, "test"); - test_random.verify(random_hash, "test"); -} + test_random.tokens[create_random_token("42535295865117307932921825928971026432")] = create_host("1.0.0.1"); // 2^127 / 4 + test_random.tokens[create_random_token("85070591730234615865843651857942052864")] = create_host("1.0.0.2"); // 2^127 / 2 + test_random.tokens[create_random_token("1605887595351923798765477786913079296")] = create_host("1.0.0.3"); // 2^127 * 3 / 4 -std::string byte_ordered_hash(const std::string& s) { - return s; + test_random.build(); + test_random.verify(); } BOOST_AUTO_TEST_CASE(byte_ordered) { - TestTokenMap test_byte_ordered; + cass::ScopedPtr token_map(cass::TokenMap::from_partitioner(cass::ByteOrderedPartitioner::name())); + + TestTokenMap test_byte_ordered; - test_byte_ordered.tokens["g"] = create_host("1.0.0.1"); - test_byte_ordered.tokens["m"] = create_host("1.0.0.2"); - test_byte_ordered.tokens["s"] = create_host("1.0.0.3"); - // Anything greater than the last host should be wrapped around to host1 + test_byte_ordered.tokens[create_byte_ordered_token("g")] = create_host("1.0.0.1"); + test_byte_ordered.tokens[create_byte_ordered_token("m")] = create_host("1.0.0.2"); + test_byte_ordered.tokens[create_byte_ordered_token("s")] = create_host("1.0.0.3"); - test_byte_ordered.build(cass::ByteOrderedPartitioner::PARTITIONER_CLASS, "test"); - test_byte_ordered.verify(byte_ordered_hash, "test"); + test_byte_ordered.build(); + test_byte_ordered.verify(); } BOOST_AUTO_TEST_CASE(remove_host) { - TestTokenMap test_remove_host; - - test_remove_host.strategy = - cass::SharedRefPtr(new cass::SimpleStrategy("", 2)); + TestTokenMap test_remove_host; test_remove_host.tokens[CASS_INT64_MIN / 2] = create_host("1.0.0.1"); - test_remove_host.tokens[0] = create_host("1.0.0.2"); + test_remove_host.tokens[0] = create_host("1.0.0.2"); test_remove_host.tokens[CASS_INT64_MAX / 2] = create_host("1.0.0.3"); - test_remove_host.build(cass::Murmur3Partitioner::PARTITIONER_CLASS, "test"); + test_remove_host.build("ks", 2); + test_remove_host.verify(); - cass::TokenMap& token_map = test_remove_host.token_map; + cass::TokenMap* token_map = test_remove_host.token_map.get(); { - const cass::CopyOnWriteHostVec& replicas - = token_map.get_replicas("test", "abc"); + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); - BOOST_REQUIRE(replicas->size() == 2); - BOOST_CHECK((*replicas)[0]->address() == cass::Address("1.0.0.1", 9042)); - BOOST_CHECK((*replicas)[1]->address() == cass::Address("1.0.0.2", 9042)); + BOOST_REQUIRE(replicas && replicas->size() == 2); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.1", 9042)); + BOOST_CHECK_EQUAL((*replicas)[1]->address(), cass::Address("1.0.0.2", 9042)); } - TestTokenMap::TokenHostMap::iterator host_to_remove_it = test_remove_host.tokens.begin(); + TestTokenMap::TokenHostMap::iterator host_to_remove_it = test_remove_host.tokens.begin(); - token_map.remove_host(host_to_remove_it->second); + token_map->remove_host_and_build(host_to_remove_it->second); { - const cass::CopyOnWriteHostVec& replicas - = token_map.get_replicas("test", "abc"); + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); - BOOST_REQUIRE(replicas->size() == 2); - BOOST_CHECK((*replicas)[0]->address() == cass::Address("1.0.0.2", 9042)); - BOOST_CHECK((*replicas)[1]->address() == cass::Address("1.0.0.3", 9042)); + BOOST_REQUIRE(replicas && replicas->size() == 2); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.2", 9042)); + BOOST_CHECK_EQUAL((*replicas)[1]->address(), cass::Address("1.0.0.3", 9042)); } ++host_to_remove_it; - token_map.remove_host(host_to_remove_it->second); + token_map->remove_host_and_build(host_to_remove_it->second); { - const cass::CopyOnWriteHostVec& replicas - = token_map.get_replicas("test", "abc"); + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); - BOOST_REQUIRE(replicas->size() == 1); - BOOST_CHECK((*replicas)[0]->address() == cass::Address("1.0.0.3", 9042)); + BOOST_REQUIRE(replicas && replicas->size() == 1); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.3", 9042)); } ++host_to_remove_it; - token_map.remove_host(host_to_remove_it->second); + token_map->remove_host_and_build(host_to_remove_it->second); { - const cass::CopyOnWriteHostVec& replicas = token_map.get_replicas("test", "abc"); + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("test", "abc"); - BOOST_REQUIRE(replicas->size() == 0); + BOOST_CHECK(!replicas); } } -BOOST_AUTO_TEST_CASE(drop_keyspace) +BOOST_AUTO_TEST_CASE(update_host) { - TestTokenMap test_drop_keyspace; + TestTokenMap test_update_host; - test_drop_keyspace.strategy = - cass::SharedRefPtr(new cass::SimpleStrategy("", 2)); + test_update_host.tokens[CASS_INT64_MIN / 2] = create_host("1.0.0.1"); + test_update_host.tokens[CASS_INT64_MIN / 4] = create_host("1.0.0.2"); - test_drop_keyspace.tokens[CASS_INT64_MIN / 2] = create_host("1.0.0.1"); - test_drop_keyspace.tokens[0] = create_host("1.0.0.2"); - test_drop_keyspace.tokens[CASS_INT64_MAX / 2] = create_host("1.0.0.3"); + test_update_host.build("ks", 4); + test_update_host.verify(); + + cass::TokenMap* token_map = test_update_host.token_map.get(); + + { + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); + + BOOST_REQUIRE(replicas && replicas->size() == 2); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.1", 9042)); + BOOST_CHECK_EQUAL((*replicas)[1]->address(), cass::Address("1.0.0.2", 9042)); + } - test_drop_keyspace.build(cass::Murmur3Partitioner::PARTITIONER_CLASS, "test"); + { + cass::Murmur3Partitioner::Token token = 0; + cass::Host::Ptr host(create_host("1.0.0.3")); + + test_update_host.tokens[token] = host; - cass::TokenMap& token_map = test_drop_keyspace.token_map; + TokenCollectionBuilder builder; + builder.append_token(token); + token_map->update_host_and_build(host, builder.finish()); + } { - const cass::CopyOnWriteHostVec& replicas - = token_map.get_replicas("test", "abc"); + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); - BOOST_REQUIRE(replicas->size() == 2); - BOOST_CHECK((*replicas)[0]->address() == cass::Address("1.0.0.1", 9042)); - BOOST_CHECK((*replicas)[1]->address() == cass::Address("1.0.0.2", 9042)); + BOOST_REQUIRE(replicas && replicas->size() == 3); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.1", 9042)); + BOOST_CHECK_EQUAL((*replicas)[1]->address(), cass::Address("1.0.0.2", 9042)); + BOOST_CHECK_EQUAL((*replicas)[2]->address(), cass::Address("1.0.0.3", 9042)); } - token_map.drop_keyspace("test"); + { + cass::Murmur3Partitioner::Token token = CASS_INT64_MAX / 2; + cass::Host::Ptr host(create_host("1.0.0.4")); + + test_update_host.tokens[token] = host; + + TokenCollectionBuilder builder; + builder.append_token(token); + token_map->update_host_and_build(host, builder.finish()); + } { - const cass::CopyOnWriteHostVec& replicas - = token_map.get_replicas("test", "abc"); + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); - BOOST_REQUIRE(replicas->size() == 0); + BOOST_REQUIRE(replicas && replicas->size() == 4); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.1", 9042)); + BOOST_CHECK_EQUAL((*replicas)[1]->address(), cass::Address("1.0.0.2", 9042)); + BOOST_CHECK_EQUAL((*replicas)[2]->address(), cass::Address("1.0.0.3", 9042)); + BOOST_CHECK_EQUAL((*replicas)[3]->address(), cass::Address("1.0.0.4", 9042)); } } +BOOST_AUTO_TEST_CASE(drop_keyspace) +{ + TestTokenMap test_drop_keyspace; + + test_drop_keyspace.tokens[CASS_INT64_MIN / 2] = create_host("1.0.0.1"); + test_drop_keyspace.tokens[0] = create_host("1.0.0.2"); + test_drop_keyspace.tokens[CASS_INT64_MAX / 2] = create_host("1.0.0.3"); + + test_drop_keyspace.build("ks", 2); + test_drop_keyspace.verify(); + + cass::TokenMap* token_map = test_drop_keyspace.token_map.get(); + + { + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); + BOOST_REQUIRE(replicas && replicas->size() == 2); + BOOST_CHECK_EQUAL((*replicas)[0]->address(), cass::Address("1.0.0.1", 9042)); + BOOST_CHECK_EQUAL((*replicas)[1]->address(), cass::Address("1.0.0.2", 9042)); + } + + token_map->drop_keyspace("ks"); + + { + const cass::CopyOnWriteHostVec& replicas = token_map->get_replicas("ks", "abc"); + + BOOST_CHECK(!replicas); + } +} BOOST_AUTO_TEST_SUITE_END() diff --git a/test/unit_tests/src/test_token_map_utils.hpp b/test/unit_tests/src/test_token_map_utils.hpp new file mode 100644 index 000000000..140e86ebe --- /dev/null +++ b/test/unit_tests/src/test_token_map_utils.hpp @@ -0,0 +1,327 @@ +/* + Copyright (c) 2014-2016 DataStax + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __CASS_TEST_TOKEN_MAP_UTILS_HPP_INCLUDED__ +#define __CASS_TEST_TOKEN_MAP_UTILS_HPP_INCLUDED__ + +#include "constants.hpp" +#include "serialization.hpp" +#include "token_map_impl.hpp" +#include "uint128.hpp" + +#include "third_party/mt19937_64/mt19937_64.hpp" + +#define CASS_PROTOCOL_VERSION 3 + +class BufferBuilder { +public: + char* data() const { + return const_cast(buffer_.data()); + } + + size_t size() const { + return buffer_.size(); + } + + template + void append(T value) { + std::string buffer(size_of(value), 0); + encode(&buffer[0], value); + buffer_.append(buffer); + } + + template + void append_value(T value) { + append(size_of(value)); + append(value); + } + + void append_string(const std::string& str) { + append(str.size()); + append(str); + } + + template + void encode_at(size_t index, T value) { + assert(index < buffer_.size() && index + size_of(value) < buffer_.size()); + encode(&buffer_[index], value); + } + +private: + static size_t size_of(uint16_t value) { + return sizeof(int16_t); + } + + static size_t size_of(int32_t value) { + return sizeof(int32_t); + } + + static size_t size_of(int64_t value) { + return sizeof(int64_t); + } + + static size_t size_of(const std::string& value) { + return value.size(); + } + + static void encode(char* buf, uint16_t value) { + cass::encode_uint16(buf, value); + } + + static void encode(char* buf, int32_t value) { + cass::encode_int32(buf, value); + } + + static void encode(char* buf, int64_t value) { + cass::encode_int64(buf, value); + } + + static void encode(char* buf, const std::string& value) { + memcpy(buf, value.data(), value.size()); + } + + static size_t size_of(cass::ByteOrderedPartitioner::Token value) { + return value.size(); + } + + static void encode(char* buf, cass::ByteOrderedPartitioner::Token value) { + memcpy(buf, value.data(), value.size()); + } + + +private: + std::string buffer_; +}; + +typedef std::map ReplicationMap; + +struct ColumnMetadata { + ColumnMetadata(const std::string& name, const cass::DataType::ConstPtr& data_type) + : name(name) + , data_type(data_type) { } + std::string name; + cass::DataType::ConstPtr data_type; +}; + +typedef std::vector ColumnMetadataVec; + +class RowResultResponseBuilder : protected BufferBuilder { +public: + RowResultResponseBuilder(const ColumnMetadataVec& column_metadata) + : row_count_(0) { + append(CASS_RESULT_KIND_ROWS); // Kind + append(CASS_RESULT_FLAG_GLOBAL_TABLESPEC); // Flags + append(column_metadata.size()); // Column count + append_string("keyspace"); + append_string("table"); + + for (ColumnMetadataVec::const_iterator i = column_metadata.begin(), + end = column_metadata.end(); i != end; ++i) { + append_column_metadata(*i); + } + + row_count_index_ = size(); + append(0); // Row count (updated later) + } + + void append_keyspace_row_v3(const std::string& keyspace_name, + const ReplicationMap& replication) { + append_value(keyspace_name); + + size_t size = sizeof(int32_t); + for (ReplicationMap::const_iterator i = replication.begin(), + end = replication.end(); i != end; ++i) { + size += sizeof(int32_t) + i->first.size(); + size += sizeof(int32_t) + i->second.size(); + } + + append(size); + append(replication.size()); // Element count + for (ReplicationMap::const_iterator i = replication.begin(), + end = replication.end(); i != end; ++i) { + append_value(i->first); + append_value(i->second); + } + + ++row_count_; + } + + void append_keyspace_row_v2(const std::string& keyspace_name, + const std::string& strategy_class, + const std::string& strategy_options) { + append_value(keyspace_name); + append_value(strategy_class); + append_value(strategy_options); + + ++row_count_; + } + + void append_column_metadata(const ColumnMetadata& metadata) { + append_string(metadata.name); + append_data_type(metadata.data_type); + } + + void append_data_type(const cass::DataType::ConstPtr& data_type) { + append(data_type->value_type()); + + switch (data_type->value_type()) { + case CASS_VALUE_TYPE_LIST: + case CASS_VALUE_TYPE_SET: + append_data_type(cass::CollectionType::ConstPtr(data_type)->types()[0]); + break; + case CASS_VALUE_TYPE_MAP: + append_data_type(cass::CollectionType::ConstPtr(data_type)->types()[0]); + append_data_type(cass::CollectionType::ConstPtr(data_type)->types()[1]); + break; + case CASS_VALUE_TYPE_TUPLE: + case CASS_VALUE_TYPE_UDT: + assert(false && "Tuples and UDTs are not supported"); + break; + default: + break; + } + } + + cass::ResultResponse* finish() { + encode_at(row_count_index_, row_count_); + result_response_.decode(CASS_PROTOCOL_VERSION, data(), size()); + return &result_response_; + } + +private: + cass::ResultResponse result_response_; + size_t row_count_index_; + int32_t row_count_; +}; + +class TokenCollectionBuilder : protected BufferBuilder { +public: + TokenCollectionBuilder() + : count_(0) { + append(0); // Element count (updated later) + } + + void append_token(cass::Murmur3Partitioner::Token token) { + std::stringstream ss; + ss << token; + append_value(ss.str()); + ++count_; + } + + void append_token(cass::RandomPartitioner::Token token) { + numeric::uint128_t r(token.lo); + r |= (numeric::uint128_t(token.hi) << 64); + append_value(r.to_string()); + ++count_; + } + + void append_token(cass::ByteOrderedPartitioner::Token token) { + append_value(token); + ++count_; + } + + cass::Value* finish() { + encode_at(0, count_); + cass::CollectionType::ConstPtr data_type( + cass::CollectionType::list( + cass::DataType::ConstPtr( + new cass::DataType(CASS_VALUE_TYPE_VARINT)), false)); + value_ = cass::Value(CASS_PROTOCOL_VERSION, data_type, data(), size()); + return &value_; + } + +private: + cass::Value value_; + int32_t count_; +}; + +inline void add_keyspace_simple(const std::string& keyspace_name, + size_t replication_factor, + cass::TokenMap* token_map) { + + cass::DataType::ConstPtr varchar_data_type(new cass::DataType(CASS_VALUE_TYPE_VARCHAR)); + + ColumnMetadataVec column_metadata; + column_metadata.push_back(ColumnMetadata("keyspace_name", varchar_data_type)); + column_metadata.push_back(ColumnMetadata("replication", cass::CollectionType::map(varchar_data_type, varchar_data_type, true))); + RowResultResponseBuilder builder(column_metadata); + + ReplicationMap replication; + replication["class"] = CASS_SIMPLE_STRATEGY; + std::stringstream ss; + ss << replication_factor; + replication["replication_factor"] = ss.str(); + builder.append_keyspace_row_v3(keyspace_name, replication); + builder.finish(); + + token_map->add_keyspaces(cass::VersionNumber(3, 0, 0), builder.finish()); +} + +inline void add_keyspace_network_topology(const std::string& keyspace_name, + ReplicationMap& replication, + cass::TokenMap* token_map) { + + cass::DataType::ConstPtr varchar_data_type(new cass::DataType(CASS_VALUE_TYPE_VARCHAR)); + + ColumnMetadataVec column_metadata; + column_metadata.push_back(ColumnMetadata("keyspace_name", varchar_data_type)); + column_metadata.push_back(ColumnMetadata("replication", cass::CollectionType::map(varchar_data_type, varchar_data_type, true))); + RowResultResponseBuilder builder(column_metadata); + + replication["class"] = CASS_NETWORK_TOPOLOGY_STRATEGY; + builder.append_keyspace_row_v3(keyspace_name, replication); + builder.finish(); + + token_map->add_keyspaces(cass::VersionNumber(3, 0, 0), builder.finish()); +} + +inline void add_murmur3_host(const cass::Host::Ptr& host, + MT19937_64& rng, + size_t num_tokens, + cass::TokenMap* token_map) { + TokenCollectionBuilder builder; + for (size_t i = 0; i < num_tokens; ++i) { + builder.append_token(rng()); + } + token_map->add_host(host, builder.finish()); +} + +inline cass::Host::Ptr create_host(const std::string& address, + const std::string& rack = "", + const std::string& dc = "") { + cass::Host::Ptr host(new cass::Host(cass::Address(address, 9042), false)); + host->set_rack_and_dc(rack, dc); + return host; +} + +inline cass::RandomPartitioner::Token create_random_token(const std::string& s) { + cass::RandomPartitioner::Token token; + numeric::uint128_t i(s); + token.lo = (i & numeric::uint128_t("0xFFFFFFFFFFFFFFFF")).to_base_type(); + token.hi = (i >> 64).to_base_type(); + return token; +} + +inline cass::ByteOrderedPartitioner::Token create_byte_ordered_token(const std::string& s) { + cass::ByteOrderedPartitioner::Token token; + for (std::string::const_iterator i = s.begin(), + end = s.end(); i != end; ++i) { + token.push_back(static_cast(*i)); + } + return token; +} + +#endif diff --git a/test/unit_tests/src/uint128.hpp b/test/unit_tests/src/uint128.hpp new file mode 100644 index 000000000..9414d8b36 --- /dev/null +++ b/test/unit_tests/src/uint128.hpp @@ -0,0 +1,426 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Evan Teran + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef UINT128_20050119_H_ +#define UINT128_20050119_H_ + +#include +#include +#include +#include +#include + +#include + +namespace numeric { + +namespace detail { +template +static void divide(const T &numerator, const T &denominator, T "ient, T &remainder) { + + static const int bits = sizeof(T) * CHAR_BIT; + + if(denominator == 0) { + throw std::domain_error("divide by zero"); + } else { + T n = numerator; + T d = denominator; + T x = 1; + T answer = 0; + + + while((n >= d) && (((d >> (bits - 1)) & 1) == 0)) { + x <<= 1; + d <<= 1; + } + + while(x != 0) { + if(n >= d) { + n -= d; + answer |= x; + } + + x >>= 1; + d >>= 1; + } + + quotient = answer; + remainder = n; + } +} +} + + +// convinience macro +#define U128_C(s) uint128(#s) + +class uint128 : boost::operators, boost::shiftable { +public: + typedef uint64_t base_type; + +public: + static const unsigned int size = (sizeof(base_type) + sizeof(base_type)) * CHAR_BIT; + +private: + base_type lo; + base_type hi; + +public: + // constructors for all basic types + uint128() : lo(0), hi(0) {} + uint128(int value) : lo(static_cast(value)), hi(0) { if(value < 0) hi = static_cast(-1); } + uint128(unsigned int value) : lo(static_cast(value)), hi(0) {} + uint128(float value) : lo(static_cast(value)), hi(0) {} + uint128(double value) : lo(static_cast(value)), hi(0) {} + uint128(const uint128 &value) : lo(value.lo), hi (value.hi) {} + uint128(base_type value) : lo(value), hi(0) {} + + uint128(const std::string &sz) : lo(0), hi(0) { + + // do we have at least one character? + if(!sz.empty()) { + // make some reasonable assumptions + int radix = 10; + bool minus = false; + + std::string::const_iterator i = sz.begin(); + + // check for minus sign, i suppose technically this should only apply + // to base 10, but who says that -0x1 should be invalid? + if(*i == '-') { + ++i; + minus = true; + } + + // check if there is radix changing prefix (0 or 0x) + if(i != sz.end()) { + if(*i == '0') { + radix = 8; + ++i; + if(i != sz.end()) { + if(*i == 'x') { + radix = 16; + ++i; + } + } + } + + while(i != sz.end()) { + unsigned int n; + const char ch = *i; + + if(ch >= 'A' && ch <= 'Z') { + if(((ch - 'A') + 10) < radix) { + n = (ch - 'A') + 10; + } else { + break; + } + } else if(ch >= 'a' && ch <= 'z') { + if(((ch - 'a') + 10) < radix) { + n = (ch - 'a') + 10; + } else { + break; + } + } else if(ch >= '0' && ch <= '9') { + if((ch - '0') < radix) { + n = (ch - '0'); + } else { + break; + } + } else { + /* completely invalid character */ + break; + } + + (*this) *= radix; + (*this) += n; + + ++i; + } + } + + // if this was a negative number, do that two's compliment madness :-P + if(minus) { + *this = -*this; + } + } + } + + uint128 &operator=(const uint128 &other) { + if(&other != this) { + lo = other.lo; + hi = other.hi; + } + return *this; + } + +public: // comparison operators + + bool operator==(const uint128 &o) const { + return hi == o.hi && lo == o.lo; + } + + bool operator<(const uint128 &o) const { + return (hi == o.hi) ? lo < o.lo : hi < o.hi; + } + +public: // unary operators + + bool operator!() const { + return !(hi != 0 || lo != 0); + } + + uint128 operator-() const { + // standard 2's compliment negation + return ~uint128(*this) + 1; + } + + uint128 operator~() const { + uint128 t(*this); + t.lo = ~t.lo; + t.hi = ~t.hi; + return t; + } + + uint128 &operator++() { + if(++lo == 0) { + ++hi; + } + return *this; + } + + uint128 &operator--() { + if(lo-- == 0) { + --hi; + } + return *this; + } + +public: // basic math operators + + uint128 &operator+=(const uint128 &b) { + const base_type old_lo = lo; + + lo += b.lo; + hi += b.hi; + + if(lo < old_lo) { + ++hi; + } + + return *this; + } + + uint128 &operator-=(const uint128 &b) { + // it happens to be way easier to write it + // this way instead of make a subtraction algorithm + return *this += -b; + } + + uint128 &operator*=(const uint128 &b) { + + // check for multiply by 0 + // result is always 0 :-P + if(b == 0) { + hi = 0; + lo = 0; + } else if(b != 1) { + + // check we aren't multiplying by 1 + + uint128 a(*this); + uint128 t = b; + + lo = 0; + hi = 0; + + for (unsigned int i = 0; i < size; ++i) { + if((t & 1) != 0) { + *this += (a << i); + } + + t >>= 1; + } + } + + return *this; + } + + uint128 &operator|=(const uint128 &b) { + hi |= b.hi; + lo |= b.lo; + return *this; + } + + uint128 &operator&=(const uint128 &b) { + hi &= b.hi; + lo &= b.lo; + return *this; + } + + uint128 &operator^=(const uint128 &b) { + hi ^= b.hi; + lo ^= b.lo; + return *this; + } + + uint128 &operator/=(const uint128 &b) { + uint128 remainder; + detail::divide(*this, b, *this, remainder); + return *this; + } + + uint128 &operator%=(const uint128 &b) { + uint128 quotient; + detail::divide(*this, b, quotient, *this); + return *this; + } + + uint128 &operator<<=(const uint128& rhs) { + + unsigned int n = rhs.to_integer(); + + if(n >= size) { + hi = 0; + lo = 0; + } else { + const unsigned int halfsize = size / 2; + + if(n >= halfsize){ + n -= halfsize; + hi = lo; + lo = 0; + } + + if(n != 0) { + // shift high half + hi <<= n; + + const base_type mask(~(base_type(-1) >> n)); + + // and add them to high half + hi |= (lo & mask) >> (halfsize - n); + + // and finally shift also low half + lo <<= n; + } + } + + return *this; + } + + uint128 &operator>>=(const uint128& rhs) { + + unsigned int n = rhs.to_integer(); + + if(n >= size) { + hi = 0; + lo = 0; + } else { + const unsigned int halfsize = size / 2; + + if(n >= halfsize) { + n -= halfsize; + lo = hi; + hi = 0; + } + + if(n != 0) { + // shift low half + lo >>= n; + + // get lower N bits of high half + const base_type mask(~(base_type(-1) << n)); + + // and add them to low qword + lo |= (hi & mask) << (halfsize - n); + + // and finally shift also high half + hi >>= n; + } + } + + return *this; + } + +public: + int to_integer() const { + return static_cast(lo); + } + + base_type to_base_type() const { + return lo; + } + + std::string to_string(unsigned int radix = 10) const { + if(*this == 0) { + return "0"; + } + + if(radix < 2 || radix > 37) { + return "(invalid radix)"; + } + + // at worst it will be size digits (base 2) so make our buffer + // that plus room for null terminator + static char sz [size + 1]; + sz[sizeof(sz) - 1] = '\0'; + + uint128 ii(*this); + int i = size - 1; + + while (ii != 0 && i) { + + uint128 remainder; + detail::divide(ii, uint128(radix), ii, remainder); + sz [--i] = "0123456789abcdefghijklmnopqrstuvwxyz"[remainder.to_integer()]; + } + + return &sz[i]; + } +}; + +inline std::ostream &operator<<(std::ostream &o, const uint128 &n) { + switch(o.flags() & (std::ios_base::hex | std::ios_base::dec | std::ios_base::oct)) { + case std::ios_base::hex: + o << n.to_string(16); + break; + case std::ios_base::dec: + o << n.to_string(10); + break; + case std::ios_base::oct: + o << n.to_string(8); + break; + default: + o << n.to_string(); + break; + } + return o; +} + +typedef uint128 uint128_t; + +} + +#endif